###############################################################################
# Analysis script for the manuscript "The Outdoor Dust Information Node (ODIN)
# - Development and performance assessment of a low cost ambient dust sensor"
# submitted to Atmospheric Measurement Techniques. June 2015
###############################################################################
## Preamble ####
# Libraries
require(ggplot2)
## Loading required package: ggplot2
require(openair)
## Loading required package: openair
## Loading required package: lazyeval
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: maps
require(reshape2)
## Loading required package: reshape2
require(gridExtra)
## Loading required package: gridExtra
## Loading required package: grid
# Seed
set.seed(1)
###############################################################################
## Baseline ####
raw_dust_data_baseline <- read.table("http://files.figshare.com/2099394/sharp_baseline_test.tsv",
header = T,
sep = "",
quote = "\"'",
dec = ".",
row.names=NULL)
# Scale Dust measurements to mV (digital scale is 0 - 1024 translates to 0 - 5000 mV)
for (i in (3:10)){
raw_dust_data_baseline[,i]<-raw_dust_data_baseline[,i]*5000/1024
}
# force GMT as the time zone to avoid openair issues with daylight saving switches
# The actual time zone is 'NZST'
raw_dust_data_baseline$date=as.POSIXct(paste(raw_dust_data_baseline$Date,raw_dust_data_baseline$Time),tz='GMT')
raw_dust_data_baseline$Time<-NULL
raw_dust_data_baseline$Date<-NULL
# Averages for 10 minutes
dust_data_baseline<-timeAverage(mydata = raw_dust_data_baseline,avg.time = '1 min',statistic = 'mean')
dust_summary_baseline <- lapply( dust_data_baseline[,(2:9)] , function(x) rbind( mean = mean(x),
sd = sd(x) ,
median = median(x),
minimum = min(x),
maximum = max(x),
s.size = length(x)))
dust_summary_baseline <- data.frame( dust_summary_baseline )
dust_summary_baseline
## Dust1 Dust2 Dust3 Dust4 Dust5
## mean 0.2433483 0.9878710 27.927505 0.9618687 8.685774
## sd 0.4921017 0.9020277 2.887067 0.9134975 1.176315
## median 0.0000000 0.9765625 28.076172 0.9765625 8.789062
## minimum 0.0000000 0.0000000 17.578125 0.0000000 4.882812
## maximum 3.6621094 4.8828125 39.062500 4.8828125 13.427734
## s.size 6779.0000000 6779.0000000 6779.000000 6779.0000000 6779.000000
## Dust6 Dust7 Dust8
## mean 4.4574842 10.971814 10.666990
## sd 0.9031375 1.193490 1.320700
## median 4.8828125 10.742188 10.742188
## minimum 0.0000000 6.103516 4.882812
## maximum 8.5449219 15.625000 15.869141
## s.size 6779.0000000 6779.000000 6779.000000
###############################################################################
## Temperature dependence ####
raw_dust_data_temperature <- read.table("http://files.figshare.com/2099396/sharp_baseline_temperature_Nov2014.tsv",
header = T,
sep = "",
quote = "\"'",
dec = ".",
row.names=NULL)
# Baseline estimates from previous tests
baseline<-as.numeric(dust_summary_baseline['mean',])
# Scale Dust measurements to mV (digital scale is 0 - 1024 translates to 0 - 5000 mV)
# Substract previous baseline data from the signal.
for (i in (3:10)){
raw_dust_data_temperature[,i]<-raw_dust_data_temperature[,i]*5000/1024-baseline[i-2]
}
# force GMT as the time zone to avoid openair issues with daylight saving switches
# The actual time zone is 'NZST'
raw_dust_data_temperature$date=as.POSIXct(paste(raw_dust_data_temperature$Date,raw_dust_data_temperature$Time),tz='GMT')
raw_dust_data_temperature$Time<-NULL
raw_dust_data_temperature$Date<-NULL
# Averages for 1 minutes
dust_data_temperature<-timeAverage(mydata = raw_dust_data_temperature,avg.time = '1 min',statistic = 'mean')
long_scatter_temperature=reshape(dust_data_temperature[,c(2,3,4,5,6,7,8,9,10,11)],direction = 'long',varying = names(dust_data_temperature[2:9]),v.name='Dust',timevar = 'Sensor')
long_scatter_temperature$Sensor=as.character(long_scatter_temperature$Sensor)
png("./sensor_temperature_scatter.png",height = 620,width = 1280)
ggplot(long_scatter_temperature, aes(x=Temperature, y=Dust, color=Sensor)) +
geom_point(shape=19,alpha=1/4)+
theme(text = element_text(size = 30))+
xlab('Temperature (C)')+
ylab('Sensor response (mV)')
## Warning in loop_apply(n, do.ply): Removed 7480 rows containing missing
## values (geom_point).
dev.off()
## png
## 2
summary(Temp.lm<-lm(data=long_scatter_temperature,Dust~Temperature))
##
## Call:
## lm(formula = Dust ~ Temperature, data = long_scatter_temperature)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5220 -2.9819 -0.4121 2.7052 18.7857
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.239956 0.064005 50.62 <2e-16 ***
## Temperature 0.270529 0.003985 67.88 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.811 on 27374 degrees of freedom
## (7480 observations deleted due to missingness)
## Multiple R-squared: 0.1441, Adjusted R-squared: 0.144
## F-statistic: 4607 on 1 and 27374 DF, p-value: < 2.2e-16
###############################################################################
## ODIN's performance evaluation ####
odin_01 <- read.table("http://files.figshare.com/2099373/odin01_24July_14August_2014.tsv",
header=TRUE,
quote="")
ecan_data<-read.table("http://files.figshare.com/2099539/StAlbans24July_14August_2014.tsv",
header = TRUE,
sep="\t",
quote = "\"'",
dec=".")
ecan_data$date<-as.POSIXct(ecan_data$date,format = "%m/%d/%Y %H:%M:%S")
odin_01$date=as.POSIXct(paste(odin_01$Date,odin_01$Time),tz='GMT')
odin_01$Time<-NULL
odin_01$Date<-NULL
odin_01$Battery<-5*odin_01$Battery/1024
# Merging the data
all_merged_FULL<-merge(odin_01,ecan_data,by = 'date',all = TRUE)
all_merged.10min<-timeAverage(all_merged_FULL,avg.time = '10 min')
# Time sync
# Check the time difference, correct the data and re-merge.
lag_test=ccf(all_merged.10min$Temperature,
all_merged.10min$Temperature.1m,
na.action=na.pass,
lag.max=100,
type='correlation',
ylab='Correlation',
main='Temperature correlation as function of clock lag')

odin01_lag=lag_test$lag[which.max(lag_test$acf)]
odin_01$date=odin_01$date-odin01_lag*10*60
odin_01$ODIN = odin_01$Dust
odin_01$Dust<-NULL
all_merged_FULL<-merge(odin_01,ecan_data,by = 'date',all = TRUE)
all_merged.10min<-timeAverage(all_merged_FULL,avg.time = '10 min')
# Dust performance using ECan data for calibration
# Calibration expression:
# $ODIN_{calibrated}=A*ODIN_{raw}+B*Temperature_{ODIN}+C*RH_{ODIN}+D$
# Full dataset 1 hour
all_merged.1hr<-timeAverage(all_merged.10min,avg.time='1 hour')
# PM$_{2.5}$ fdms
summary(odin1.lm.1hr.pm2.5.fdms<-
lm(data=all_merged.1hr,PM2.5.FDMS~
ODIN+Temperature+Humidity))
##
## Call:
## lm(formula = PM2.5.FDMS ~ ODIN + Temperature + Humidity, data = all_merged.1hr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.901 -7.623 -1.608 5.541 95.078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -35.57353 7.55221 -4.710 3.21e-06 ***
## ODIN 0.37554 0.01526 24.605 < 2e-16 ***
## Temperature -4.45436 0.17393 -25.610 < 2e-16 ***
## Humidity -0.63004 0.07362 -8.558 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.79 on 502 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.7002, Adjusted R-squared: 0.6984
## F-statistic: 390.8 on 3 and 502 DF, p-value: < 2.2e-16
format(coef(odin1.lm.1hr.pm2.5.fdms),digits = 1)
## (Intercept) ODIN Temperature Humidity
## "-35.6" " 0.4" " -4.5" " -0.6"
format((confint(odin1.lm.1hr.pm2.5.fdms)[,2]-confint(odin1.lm.1hr.pm2.5.fdms)[,1])/2,digits = 1)
## (Intercept) ODIN Temperature Humidity
## "14.84" " 0.03" " 0.34" " 0.14"
# PM$_{10}$ fdms
summary(odin1.lm.1hr.pm10.fdms<-
lm(data=all_merged.1hr,PM10.FDMS~
ODIN+Temperature+Humidity))
##
## Call:
## lm(formula = PM10.FDMS ~ ODIN + Temperature + Humidity, data = all_merged.1hr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.859 -10.585 -2.442 7.733 97.719
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -43.12180 9.69449 -4.448 1.07e-05 ***
## ODIN 0.42655 0.01959 21.771 < 2e-16 ***
## Temperature -4.47796 0.22327 -20.056 < 2e-16 ***
## Humidity -0.64501 0.09451 -6.825 2.54e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.7 on 502 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.6084, Adjusted R-squared: 0.6061
## F-statistic: 260 on 3 and 502 DF, p-value: < 2.2e-16
format(coef(odin1.lm.1hr.pm10.fdms),digits = 1)
## (Intercept) ODIN Temperature Humidity
## "-43.1" " 0.4" " -4.5" " -0.6"
format((confint(odin1.lm.1hr.pm10.fdms)[,2]-confint(odin1.lm.1hr.pm10.fdms)[,1])/2,digits = 1)
## (Intercept) ODIN Temperature Humidity
## "19.05" " 0.04" " 0.44" " 0.19"
# 24 hour dataset
merged.24hr <- timeAverage(all_merged.1hr,avg.time = '24 hour',statistic = 'mean')
# PM$_{2.5}$ fdms
summary(odin1.lm.1dy.pm2.5.fdms<-
lm(data=merged.24hr,PM2.5.FDMS~
ODIN+Temperature+Humidity))
##
## Call:
## lm(formula = PM2.5.FDMS ~ ODIN + Temperature + Humidity, data = merged.24hr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2703 -4.0617 -0.2754 4.1605 17.2774
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.16933 31.38948 -0.101 0.9207
## ODIN 0.31459 0.05786 5.437 3.64e-05 ***
## Temperature -4.60105 0.86697 -5.307 4.80e-05 ***
## Humidity -0.79303 0.36717 -2.160 0.0445 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.715 on 18 degrees of freedom
## Multiple R-squared: 0.7081, Adjusted R-squared: 0.6595
## F-statistic: 14.56 on 3 and 18 DF, p-value: 4.649e-05
format(coef(odin1.lm.1dy.pm2.5.fdms),digits = 1)
## (Intercept) ODIN Temperature Humidity
## "-3.2" " 0.3" "-4.6" "-0.8"
format((confint(odin1.lm.1dy.pm2.5.fdms)[,2]-confint(odin1.lm.1dy.pm2.5.fdms)[,1])/2,digits = 1)
## (Intercept) ODIN Temperature Humidity
## "65.9" " 0.1" " 1.8" " 0.8"
# PM$_{10}$ fdms
summary(odin1.lm.1dy.pm10.fdms<-
lm(data=merged.24hr,PM10.FDMS~
ODIN+Temperature+Humidity))
##
## Call:
## lm(formula = PM10.FDMS ~ ODIN + Temperature + Humidity, data = merged.24hr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.2492 -5.5529 0.0465 4.7099 18.7966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.58157 36.39163 0.071 0.944229
## ODIN 0.35067 0.06708 5.228 5.69e-05 ***
## Temperature -4.76316 1.00513 -4.739 0.000164 ***
## Humidity -0.92979 0.42568 -2.184 0.042420 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.1 on 18 degrees of freedom
## Multiple R-squared: 0.6679, Adjusted R-squared: 0.6126
## F-statistic: 12.07 on 3 and 18 DF, p-value: 0.0001448
format(coef(odin1.lm.1dy.pm10.fdms),digits = 1)
## (Intercept) ODIN Temperature Humidity
## " 2.6" " 0.4" "-4.8" "-0.9"
format((confint(odin1.lm.1dy.pm10.fdms)[,2]-confint(odin1.lm.1dy.pm10.fdms)[,1])/2,digits = 1)
## (Intercept) ODIN Temperature Humidity
## "76.5" " 0.1" " 2.1" " 0.9"
### Calculate the corrected data
all_merged.1hr$ODIN.2.5f<-predict(odin1.lm.1hr.pm2.5.fdms,
newdata = all_merged.1hr)
all_merged.1hr$ODIN.10f<-predict(odin1.lm.1hr.pm10.fdms,
newdata = all_merged.1hr)
merged.24hr$ODIN.2.5f<-predict(odin1.lm.1dy.pm2.5.fdms,
newdata = merged.24hr)
merged.24hr$ODIN.10f<-predict(odin1.lm.1dy.pm10.fdms,
newdata = merged.24hr)
png('./raw_odin_fdms.png',width = 2400,height = 1200)
tseries_pm2.5<-ggplot(data = all_merged.1hr, aes(x=date,y=PM2.5.FDMS))+
geom_line(colour = 'red',linetype = 2)+
ylab(bquote(PM[2.5]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
tseries_ODIN<-ggplot(data = all_merged.1hr, aes(x=date,y=ODIN))+
geom_line(colour = 'black',linetype = 1)+
ylab('ODIN(mV)')+
theme(text=element_text(size=30))
tseries_pm10<-ggplot(data = all_merged.1hr, aes(x=date,y=PM10.FDMS))+
geom_line(colour = 'blue',linetype = 2)+
ylab(bquote(PM[10]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
grid.arrange(tseries_pm2.5,tseries_ODIN,tseries_pm10,ncol = 1)
## Warning in loop_apply(n, do.ply): Removed 22 rows containing missing values
## (geom_path).
dev.off()
## png
## 2
png('./corrected_odin_fdms_PM2.5.png',width = 2400,height = 1200)
tseries_pm2.5_1hr<-ggplot(data = all_merged.1hr, aes(x=date))+
geom_line(aes(y=PM2.5.FDMS),colour = 'red',linetype = 2)+
geom_line(aes(y=ODIN.2.5f),colour = 'black',linetype = 1)+
ylab(bquote(PM[2.5]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
tseries_pm2.5_1dy<-ggplot(data = merged.24hr, aes(x=date))+
geom_line(aes(y=PM2.5.FDMS),colour = 'red',linetype = 2)+
geom_line(aes(y=ODIN.2.5f),colour = 'black',linetype = 1)+
ylab(bquote(PM[2.5]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
grid.arrange(tseries_pm2.5_1hr,tseries_pm2.5_1dy,ncol = 1)
## Warning in loop_apply(n, do.ply): Removed 22 rows containing missing values
## (geom_path).
dev.off()
## png
## 2
png('./corrected_odin_fdms_PM10.png',width = 2400,height = 1200)
tseries_pm10_1hr<-ggplot(data = all_merged.1hr, aes(x=date))+
geom_line(aes(y=PM10.FDMS),colour = 'red',linetype = 2)+
geom_line(aes(y=ODIN.10f),colour = 'black',linetype = 1)+
ylab(bquote(PM[10]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
tseries_pm10_1dy<-ggplot(data = merged.24hr, aes(x=date))+
geom_line(aes(y=PM10.FDMS),colour = 'red',linetype = 2)+
geom_line(aes(y=ODIN.10f),colour = 'black',linetype = 1)+
ylab(bquote(PM[10]~'('*mu~'g'~m^-3~')'))+
theme(text=element_text(size=30))
grid.arrange(tseries_pm10_1hr,tseries_pm10_1dy,ncol = 1)
## Warning in loop_apply(n, do.ply): Removed 22 rows containing missing values
## (geom_path).
dev.off()
## png
## 2