This script parses and cleans the ODIN-SD data from their memory cards to then calculate summary statistics and plot draft maps of PM10 and PM2.5

Prepare libraries

library(librarian) # To more flexibly manage packages
shelf(readr,
      openair,
      automap,
      raster,
      gstat,
      sp,
      rgdal,
      ggmap,
      ggplot2,
      scales)
## Warning in shelf(readr, openair, automap, raster, gstat, sp, rgdal, ggmap, : cran_repo = '@CRAN@' is not a valid URL. 
##                     Defaulting to cran_repo = 'https://cran.r-project.org'.
## Loading required package: sp
## rgdal: version: 1.3-6, (SVN revision 773)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.4, released 2018/03/19
##  Path to GDAL shared files: /usr/share/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.3-1
## Loading required package: ggplot2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor

Set constants

data_path <- "~/data/ODIN_SD/Gisborne/WORKING/"
folders_list <- dir(data_path,pattern = '00')
# Define time average for output
tavg <- '10 min'

Load data

Cycle through the folders to work with all the DATA.TXT files

for (i in (1:length(folders_list))){
  folder <- folders_list[i]
  print(folder)
  odin.data <- readr::read_delim(paste0(data_path,folder,"/DATA.TXT"),
                                 delim = ';',
                                 skip = 1,
                                 col_names = c('framelength',
                                               'PM1',
                                               'PM2.5',
                                               'PM10',
                                               'PM1x',
                                               'PM2.5x',
                                               'PM10x',
                                               'GasSN',
                                               'Gasppm',
                                               'GasT',
                                               'Gas2mV',
                                               'Temperature',
                                               'RH',
                                               'ODINid',
                                               'ODINsn',
                                               'RTCdate',
                                               'RTCtime',
                                               'GSMdate',
                                               'GSMtime',
                                               'RTCdate2',
                                               'RTCtime2'))
  # Pad missing GSM date-time with RTC date-time for simplicity
  
  odin.data$GSMtime[is.na(odin.data$GSMtime)] <- odin.data$RTCtime[is.na(odin.data$GSMtime)]
  odin.data$GSMdate[is.na(odin.data$GSMdate)] <- odin.data$RTCdate[is.na(odin.data$GSMdate)]
  
  # Construct POSIX objects for RTC and GSM timestamps
  odin.data$RTCtimestamp <- as.POSIXct(paste(odin.data$RTCdate,odin.data$RTCtime),tz='UTC')
  odin.data$GSMtimestamp <- as.POSIXct(paste(odin.data$GSMdate,odin.data$GSMtime),tz='UTC')

  # Find the correction from RTC to GSM timestamps

  time_correction.all <- as.numeric(difftime(odin.data$GSMtimestamp,odin.data$RTCtimestamp,units = 'secs'))
  time_correction.all[time_correction.all==0] <- NA
  time_diff <- mean(time_correction.all,na.rm = TRUE)

  # Calculate the "real" timestamp for the records

  odin.data$date <- odin.data$RTCtimestamp + time_diff
  odin.data <- odin.data[,c('date',
                            'PM1',
                            'PM2.5',
                            'PM10',
                            'Temperature',
                            'RH',
                            'ODINsn')]

  # Construct the ALLDATA frame

  if (i == 1){
    # This is the first iteration so we just copy the "odin.data" dataframe
    all.data <- odin.data
    all.data.tavg <- timeAverage(odin.data,avg.time = tavg)
    all.data.tavg$ODINsn <- odin.data$ODINsn[1]
  } else {
    # We already have "all.data" so we need to append the current "odin.data"
    all.data <- rbind(all.data,odin.data)
    tmp1 <- timeAverage(odin.data,avg.time = tavg)
    tmp1$ODINsn <- odin.data$ODINsn[1]
    all.data.tavg <- rbind(all.data.tavg,tmp1)
    # Remove all.data to clean for next iteration
    rm(odin.data)
  }
}
## [1] "0005"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16679 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   527 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… file 2   725 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… row 3   726 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… col 4   727 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… expected 5   728 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0008"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17040 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   327 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… file 2   328 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… row 3   329 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… col 4   330 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… expected 5   331 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0009"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17272 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    20 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… file 2   119 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… row 3   120 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… col 4   121 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… expected 5   122 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0010"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 68195 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   485 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… file 2   486 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… row 3   487 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… col 4   488 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… expected 5   489 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0011"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17382 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    18 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… file 2    19 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… row 3    20 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… col 4    21 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… expected 5    22 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0014"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 12591 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1  3941 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… file 2  4329 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… row 3  4618 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… col 4  4792 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… expected 5  4793 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0016"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16217 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    28 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… file 2    29 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… row 3    30 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… col 4    31 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… expected 5    32 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0017"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 15361 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   686 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… file 2   786 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… row 3  2026 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… col 4  2027 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… expected 5  2028 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0018"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17389 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1     5 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… file 2     6 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… row 3     7 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… col 4     8 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… expected 5     9 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0021"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17241 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   167 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… file 2   168 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… row 3   169 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… col 4   170 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… expected 5   171 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0023"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 42993 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   134 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… file 2   236 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… row 3   237 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… col 4   238 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… expected 5   239 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0025"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17351 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    43 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… file 2    44 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… row 3    45 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… col 4    46 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… expected 5    47 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0026"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17398 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1     5 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… file 2     6 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… row 3     7 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… col 4     8 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… expected 5     9 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0027"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16666 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   230 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… file 2   339 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… row 3   340 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… col 4   341 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… expected 5   342 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0028"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17391 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    10 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… file 2    11 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… row 3    12 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… col 4    13 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… expected 5    14 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0029"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16866 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   531 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… file 2   532 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… row 3   533 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… col 4   534 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… expected 5   535 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0031"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINid = col_character(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 4465 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   562 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… file 2  1071 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… row 3  1379 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… col 4  2197 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… expected 5  2282 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.

Get devices locations

odin_locations <- readr::read_delim("odin_locations.txt", 
                             "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   serialn = col_character(),
##   lat = col_double(),
##   lon = col_double()
## )
nsites <- length(odin_locations$serialn)
all.data$lon <- 0
all.data$lat <- 0
all.data.tavg$lon <- 0
all.data.tavg$lat <- 0
for (j in (1:nsites)){
  print(odin_locations$serialn[j])
  loc_id <- (substr(all.data$ODINsn,3,6)==substr(odin_locations$serialn[j],7,11))
  all.data$lon[loc_id] <- odin_locations$lon[j]
  all.data$lat[loc_id] <- odin_locations$lat[j]
  loc_id.tavg <- (substr(all.data.tavg$ODINsn,3,6)==substr(odin_locations$serialn[j],7,11))
  all.data.tavg$lon[loc_id.tavg] <- odin_locations$lon[j]
  all.data.tavg$lat[loc_id.tavg] <- odin_locations$lat[j]
}
## [1] "ODINSD0025"
## [1] "ODINSD0023"
## [1] "ODINSD0026"
## [1] "ODINSD0016"
## [1] "ODINSD0011"
## [1] "ODINSD0031"
## [1] "ODINSD0029"
## [1] "ODINSD0028"
## [1] "ODINSD0009"
## [1] "ODINSD0018"
## [1] "ODINSD0014"
## [1] "ODINSD0017"
## [1] "ODINSD0027"
## [1] "ODINSD0008"
## [1] "ODINSD0010"
## [1] "ODINSD0021"
# Subset for the campaign
start_date <-as.POSIXct("2018-06-22 12:00:00",tz="UTC")
end_date <-as.POSIXct("2018-08-16 12:00:00",tz="UTC")
nminutes <- difftime(end_date,start_date,units = 'min')
all.data <- subset(all.data, (date >= start_date) & (date <= end_date))
all.data.tavg <- subset(all.data.tavg, (date >= start_date) & (date <= end_date))

Summary statistics

Note that the statistics are calculated on 10 minutes averages and that the campaign went from June 22nd 12:00 UTC until August 16th 12:00 UTC (55 days in total).

The units for the parameters are: * PM2.5 [\(mu\)g/m3] * PM10 [\(mu\)g/m3] * Temperature [celsius] * RH [%]

# Calculate the summary table for each unit
summary_mean <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = mean)
summary_max <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = max)
summary_min <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = min)
summary_sd <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = sd)
summary_N <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = length)
summary_pct <- summary_N
summary_pct[,2:5] <- format(100 * (summary_N[,2:5] / (as.numeric(nminutes) / 10)),digits = 2)

Average concentrations

print(format(summary_mean,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          12 73  10.8 10.8
## 2  SD0008          13 69  12.8 18.5
## 3  SD0009          13 69  13.6 17.3
## 4  SD0010          13 67  25.5 34.1
## 5  SD0011          12 71  17.3 20.2
## 6  SD0014          12 71  20.4 28.7
## 7  SD0016          13 67  16.6 19.6
## 8  SD0017          12 70  20.9 24.4
## 9  SD0018          13 69  22.8 29.6
## 10 SD0021          13 70  14.1 17.0
## 11 SD0023          13 68   8.9 12.0
## 12 SD0025          12 71   4.9  6.7
## 13 SD0026          14 66  14.9 19.6
## 14 SD0027          13 70  11.5 16.3
## 15 SD0029          13 70  17.3 22.4
## 16 SD0031          12 72  12.5 15.5
## 17 SN0028          12 71   0.4  0.5

Maximum concentrations

print(format(summary_max,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          27 98   100  100
## 2  SD0008          29 98   203  299
## 3  SD0009          31 96   172  252
## 4  SD0010          31 93   353  536
## 5  SD0011          27 96   260  328
## 6  SD0014          29 97   322  528
## 7  SD0016          34 95   262  366
## 8  SD0017          28 96   338  454
## 9  SD0018          27 95   232  536
## 10 SD0021          28 98   314 1284
## 11 SD0023          33 96   164  213
## 12 SD0025          31 95   154  180
## 13 SD0026          32 95   242  328
## 14 SD0027          31 98   225  337
## 15 SD0029          30 96   280  361
## 16 SD0031          27 96   366  507
## 17 SN0028          26 98    40   46

Minimum concentrations

print(format(summary_min,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005         0.3 27     4    4
## 2  SD0008         2.0 21     0    0
## 3  SD0009         0.2 21     0    0
## 4  SD0010         1.8 20     0    0
## 5  SD0011         1.4 24     0    0
## 6  SD0014         0.3 25     0    0
## 7  SD0016         2.2 16     0    0
## 8  SD0017         1.6 24     0    0
## 9  SD0018         1.4 22     0    0
## 10 SD0021         0.6 24     0    0
## 11 SD0023         1.4 18     0    0
## 12 SD0025         1.8 24     0    0
## 13 SD0026         3.5 18     0    0
## 14 SD0027         2.3 19     0    0
## 15 SD0029         1.3 22     0    0
## 16 SD0031         1.8 23     0    0
## 17 SN0028         0.5 27     0    0

Standard deviation

print(format(summary_mean,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          12 73  10.8 10.8
## 2  SD0008          13 69  12.8 18.5
## 3  SD0009          13 69  13.6 17.3
## 4  SD0010          13 67  25.5 34.1
## 5  SD0011          12 71  17.3 20.2
## 6  SD0014          12 71  20.4 28.7
## 7  SD0016          13 67  16.6 19.6
## 8  SD0017          12 70  20.9 24.4
## 9  SD0018          13 69  22.8 29.6
## 10 SD0021          13 70  14.1 17.0
## 11 SD0023          13 68   8.9 12.0
## 12 SD0025          12 71   4.9  6.7
## 13 SD0026          14 66  14.9 19.6
## 14 SD0027          13 70  11.5 16.3
## 15 SD0029          13 70  17.3 22.4
## 16 SD0031          12 72  12.5 15.5
## 17 SN0028          12 71   0.4  0.5

Data availability [%]

print(format(summary_pct,digits = 1))
##    ODINsn Temperature  RH PM2.5 PM10
## 1  SD0005         100 100   100  100
## 2  SD0008         100 100   100  100
## 3  SD0009         100 100   100  100
## 4  SD0010          95  95    95   95
## 5  SD0011         100 100   100  100
## 6  SD0014         100 100   100  100
## 7  SD0016          95  95    95   95
## 8  SD0017         100 100   100  100
## 9  SD0018         100 100   100  100
## 10 SD0021         100 100   100  100
## 11 SD0023         100 100   100  100
## 12 SD0025         100 100   100  100
## 13 SD0026         100 100   100  100
## 14 SD0027         100 100   100  100
## 15 SD0029         100 100   100  100
## 16 SD0031         100 100   100  100
## 17 SN0028         100 100   100  100

Malfunctioning ODIN

The previous summaries show that ODIN-0005 malfunctioned so it will be removed from the analysis. Also, after the QA process we identified ODIN-0028 as malfunctional so we also removed that unit from the analysis.

remove_idx <- ((all.data$ODINsn == "SD0005") | (all.data$ODINsn == "SN0028"))
all.data[remove_idx,] <- NA
all.data <- na.exclude(all.data)
remove_idx <- ((all.data.tavg$ODINsn == "SD0005") | (all.data.tavg$ODINsn == "SN0028"))
all.data.tavg[remove_idx,] <- NA
all.data.tavg <- na.exclude(all.data.tavg)

Time series

PM2.5

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=PM2.5,colour = ODINsn))

PM10

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=PM10,colour = ODINsn))

Temperature

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=Temperature,colour = ODINsn))

Relative Humidity

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=RH,colour = ODINsn))

Average Maps

# Some useful constants
proj4string_NZTM <- CRS('+init=epsg:2193')
proj4string_latlon <- CRS('+init=epsg:4326')
# Assign coordinates to the dataframe
summary_mean_map <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn, all.data.tavg, FUN = mean)
coordinates(summary_mean_map) <- ~ lon + lat
proj4string(summary_mean_map) <- proj4string_latlon

# Get the basemap
centre_lat <- mean(summary_mean_map$lat)
centre_lon <- mean(summary_mean_map$lon)
ca <- get_googlemap(
  c(lon=centre_lon,lat=centre_lat),
  zoom=13,
  scale=2,
  color="bw",
  key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=-38.661357,178.019815&zoom=13&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg

PM2.5

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM2.5)),
                          name = "PM2.5", oob=squish)

PM10

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM10),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM10)),
                          name = "PM10", oob=squish)

PMcoarse

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM10 - summary_mean_map$PM2.5)),
                          name = "PMcoarse", oob=squish)

#’ ## Kriging

proj4string_NZTM <- CRS(‘+init=epsg:2193’) proj4string_latlon <- CRS(‘+init=epsg:4326’) summary_mean_map <- spTransform(summary_mean_map,proj4string_NZTM) cellsize <- 100 min_x <- summary_mean_map@bbox[1,1] - cellsize#minimun x coordinate min_y <- summary_mean_map@bbox[2,1] - cellsize #minimun y coordinate max_x <- summary_mean_map@bbox[1,2] + cellsize #mximum x coordinate max_y <- summary_mean_map@bbox[2,2] + cellsize #maximum y coordinate

x_length <- max_x - min_x #easting amplitude y_length <- max_y - min_y #northing amplitude ncol <- round(x_length/cellsize,0) #number of columns in grid nrow <- round(y_length/cellsize,0) #number of rows in grid

grid <- GridTopology(cellcentre.offset=c(min_x,min_y),cellsize=c(cellsize,cellsize),cells.dim=c(ncol,nrow))

#Convert GridTopolgy object to SpatialPixelsDataFrame object. ##### grid <- SpatialPixelsDataFrame(grid, data=data.frame(id=1:prod(ncol,nrow)), proj4string=CRS(‘+init=epsg:2193’)) surf.krig <- autoKrige(PM2.5 ~ 1,data=summary_mean_map,new_data = grid, input_data=summary_mean_map)

proj4string(surf.krig$krige_output) <- CRS(‘+init=epsg:2193’)

plot(rasterFromXYZ(cbind(surf.krig$krige_output@coords,surf.krig$krige_output@data$var1.pred)))

# Save the "all.data" dataframe
save(all.data,file = 'alldata.RData')
save(all.data.tavg,file = 'alldataTAVG.RData')
data.output <- all.data.tavg[,c('date','PM2.5','PM10','Temperature','RH','ODINsn')]
write_excel_csv(data.output,'./gisborne_10min.csv')