This script parses and cleans the ODIN-SD data from their memory cards to then calculate summary statistics and plot draft maps of PM10 and PM2.5

Prepare libraries

library(librarian) # To more flexibly manage packages
shelf(readr,
      openair,
      automap,
      raster,
      gstat,
      sp,
      rgdal,
      ggmap,
      ggplot2,
      scales)
## Warning in shelf(readr, openair, automap, raster, gstat, sp, rgdal, ggmap, : cran_repo = '@CRAN@' is not a valid URL. 
##                     Defaulting to cran_repo = 'https://cran.r-project.org'.
## Loading required package: sp
## rgdal: version: 1.3-6, (SVN revision 773)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.4, released 2018/03/19
##  Path to GDAL shared files: /usr/share/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.3-1
## Loading required package: ggplot2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor

Set constants

data_path <- "~/data/ODIN_SD/Gisborne/WORKING/"
folders_list <- dir(data_path,pattern = '00')
# Define time average for output
tavg <- '1 hour'

Load data

Cycle through the folders to work with all the DATA.TXT files

for (i in (1:length(folders_list))){
  folder <- folders_list[i]
  print(folder)
  odin.data <- readr::read_delim(paste0(data_path,folder,"/DATA.TXT"),
                                 delim = ';',
                                 skip = 1,
                                 col_names = c('framelength',
                                               'PM1',
                                               'PM2.5',
                                               'PM10',
                                               'PM1x',
                                               'PM2.5x',
                                               'PM10x',
                                               'GasSN',
                                               'Gasppm',
                                               'GasT',
                                               'Gas2mV',
                                               'Temperature',
                                               'RH',
                                               'ODINid',
                                               'ODINsn',
                                               'RTCdate',
                                               'RTCtime',
                                               'GSMdate',
                                               'GSMtime',
                                               'RTCdate2',
                                               'RTCtime2'))
  # Pad missing GSM date-time with RTC date-time for simplicity
  
  odin.data$GSMtime[is.na(odin.data$GSMtime)] <- odin.data$RTCtime[is.na(odin.data$GSMtime)]
  odin.data$GSMdate[is.na(odin.data$GSMdate)] <- odin.data$RTCdate[is.na(odin.data$GSMdate)]
  
  # Construct POSIX objects for RTC and GSM timestamps
  odin.data$RTCtimestamp <- as.POSIXct(paste(odin.data$RTCdate,odin.data$RTCtime),tz='UTC')
  odin.data$GSMtimestamp <- as.POSIXct(paste(odin.data$GSMdate,odin.data$GSMtime),tz='UTC')

  # Find the correction from RTC to GSM timestamps

  time_correction.all <- as.numeric(difftime(odin.data$GSMtimestamp,odin.data$RTCtimestamp,units = 'secs'))
  time_correction.all[time_correction.all==0] <- NA
  time_diff <- mean(time_correction.all,na.rm = TRUE)

  # Calculate the "real" timestamp for the records

  odin.data$date <- odin.data$RTCtimestamp + time_diff
  odin.data <- odin.data[,c('date',
                            'PM1',
                            'PM2.5',
                            'PM10',
                            'Temperature',
                            'RH',
                            'ODINsn')]

  # Construct the ALLDATA frame

  if (i == 1){
    # This is the first iteration so we just copy the "odin.data" dataframe
    all.data <- odin.data
    all.data.tavg <- timeAverage(odin.data,avg.time = tavg)
    all.data.tavg$ODINsn <- odin.data$ODINsn[1]
  } else {
    # We already have "all.data" so we need to append the current "odin.data"
    all.data <- rbind(all.data,odin.data)
    tmp1 <- timeAverage(odin.data,avg.time = tavg)
    tmp1$ODINsn <- odin.data$ODINsn[1]
    all.data.tavg <- rbind(all.data.tavg,tmp1)
    # Remove all.data to clean for next iteration
    rm(odin.data)
  }
}
## [1] "0005"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16679 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   527 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… file 2   725 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… row 3   726 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… col 4   727 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/… expected 5   728 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0005/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0008"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17040 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   327 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… file 2   328 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… row 3   329 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… col 4   330 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/… expected 5   331 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0008/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0009"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17272 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    20 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… file 2   119 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… row 3   120 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… col 4   121 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/… expected 5   122 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0009/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0010"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 68195 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   485 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… file 2   486 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… row 3   487 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… col 4   488 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/… expected 5   489 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0010/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0011"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17382 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    18 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… file 2    19 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… row 3    20 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… col 4    21 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/… expected 5    22 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0011/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0014"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 12591 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1  3941 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… file 2  4329 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… row 3  4618 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… col 4  4792 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/… expected 5  4793 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0014/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0016"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16217 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    28 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… file 2    29 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… row 3    30 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… col 4    31 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/… expected 5    32 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0016/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0017"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 15361 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   686 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… file 2   786 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… row 3  2026 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… col 4  2027 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/… expected 5  2028 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0017/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0018"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17389 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1     5 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… file 2     6 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… row 3     7 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… col 4     8 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/… expected 5     9 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0018/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0021"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17241 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   167 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… file 2   168 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… row 3   169 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… col 4   170 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/… expected 5   171 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0021/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0023"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 42993 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   134 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… file 2   236 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… row 3   237 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… col 4   238 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/… expected 5   239 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0023/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0025"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17351 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    43 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… file 2    44 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… row 3    45 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… col 4    46 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/… expected 5    47 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0025/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0026"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17398 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1     5 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… file 2     6 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… row 3     7 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… col 4     8 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/… expected 5     9 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0026/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0027"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16666 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   230 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… file 2   339 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… row 3   340 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… col 4   341 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/… expected 5   342 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0027/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0028"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 17391 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1    10 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… file 2    11 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… row 3    12 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… col 4    13 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/… expected 5    14 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0028/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0029"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 16866 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   531 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… file 2   532 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… row 3   533 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… col 4   534 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/… expected 5   535 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0029/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
## [1] "0031"
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   GasSN = col_character(),
##   Temperature = col_double(),
##   RH = col_double(),
##   ODINid = col_character(),
##   ODINsn = col_character(),
##   RTCdate = col_date(format = ""),
##   RTCtime = col_time(format = ""),
##   GSMdate = col_date(format = ""),
##   GSMtime = col_time(format = ""),
##   RTCdate2 = col_date(format = ""),
##   RTCtime2 = col_time(format = "")
## )
## See spec(...) for full column specifications.
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 4465 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual    file                                    expected   <int> <chr> <chr>      <chr>     <chr>                                   actual 1   562 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… file 2  1071 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… row 3  1379 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… col 4  2197 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/… expected 5  2282 <NA>  21 columns 18 colum… '~/data/ODIN_SD/Gisborne/WORKING/0031/…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.

Get devices locations

odin_locations <- readr::read_delim("odin_locations.txt", 
                             "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   serialn = col_character(),
##   lat = col_double(),
##   lon = col_double()
## )
nsites <- length(odin_locations$serialn)
all.data$lon <- 0
all.data$lat <- 0
all.data.tavg$lon <- 0
all.data.tavg$lat <- 0
for (j in (1:nsites)){
  print(odin_locations$serialn[j])
  loc_id <- (substr(all.data$ODINsn,3,6)==substr(odin_locations$serialn[j],7,11))
  all.data$lon[loc_id] <- odin_locations$lon[j]
  all.data$lat[loc_id] <- odin_locations$lat[j]
  loc_id.tavg <- (substr(all.data.tavg$ODINsn,3,6)==substr(odin_locations$serialn[j],7,11))
  all.data.tavg$lon[loc_id.tavg] <- odin_locations$lon[j]
  all.data.tavg$lat[loc_id.tavg] <- odin_locations$lat[j]
}
## [1] "ODINSD0025"
## [1] "ODINSD0023"
## [1] "ODINSD0026"
## [1] "ODINSD0016"
## [1] "ODINSD0011"
## [1] "ODINSD0031"
## [1] "ODINSD0029"
## [1] "ODINSD0028"
## [1] "ODINSD0009"
## [1] "ODINSD0018"
## [1] "ODINSD0014"
## [1] "ODINSD0017"
## [1] "ODINSD0027"
## [1] "ODINSD0008"
## [1] "ODINSD0010"
## [1] "ODINSD0021"
# Subset for the campaign
start_date <-as.POSIXct("2018-06-22 12:00:00",tz="UTC")
end_date <-as.POSIXct("2018-08-16 12:00:00",tz="UTC")
start_date <-as.POSIXct("2018-07-10 00:00:00",tz="UTC")
end_date <-as.POSIXct("2018-07-15 00:00:00",tz="UTC")
nminutes <- difftime(end_date,start_date,units = 'min')
all.data <- subset(all.data, (date >= start_date) & (date <= end_date))
all.data.tavg <- subset(all.data.tavg, (date >= start_date) & (date <= end_date))

Summary statistics

Note that the statistics are calculated on 10 minutes averages and that the campaign went from June 22nd 12:00 UTC until August 16th 12:00 UTC (55 days in total).

The units for the parameters are: * PM2.5 [\(mu\)g/m3] * PM10 [\(mu\)g/m3] * Temperature [celsius] * RH [%]

# Calculate the summary table for each unit
summary_mean <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = mean)
summary_max <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = max)
summary_min <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = min)
summary_sd <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = sd)
summary_N <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data.tavg, FUN = length)
summary_pct <- summary_N
summary_pct[,2:5] <- format(100 * (summary_N[,2:5] / (as.numeric(nminutes) / 10)),digits = 2)

Average concentrations

print(format(summary_mean,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          10 77  10.7 10.8
## 2  SD0008          11 73  25.3 36.3
## 3  SD0009          11 72  24.2 30.7
## 4  SD0010          12 69  49.4 67.0
## 5  SD0011          11 76  30.6 36.6
## 6  SD0014          10 75  29.4 41.9
## 7  SD0016          12 71  29.4 36.0
## 8  SD0017          11 73  37.4 44.3
## 9  SD0018          11 73  35.6 46.4
## 10 SD0021          11 73  29.5 36.7
## 11 SD0023          12 72  14.2 19.3
## 12 SD0025          11 75   7.7 10.7
## 13 SD0026          12 68  25.6 33.9
## 14 SD0027          11 75  22.5 32.1
## 15 SD0029          11 74  32.1 42.0
## 16 SD0031          11 76  20.6 25.7
## 17 SN0028          10 75   0.7  0.9

Maximum concentrations

print(format(summary_max,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          24 95    52   52
## 2  SD0008          25 93   166  236
## 3  SD0009          26 92   109  134
## 4  SD0010          29 89   278  390
## 5  SD0011          23 93   192  245
## 6  SD0014          22 93   147  195
## 7  SD0016          27 93   180  228
## 8  SD0017          24 92   247  312
## 9  SD0018          25 92   171  219
## 10 SD0021          26 94   189  265
## 11 SD0023          28 92    75   97
## 12 SD0025          27 94    52   70
## 13 SD0026          29 90   182  233
## 14 SD0027          24 96   158  222
## 15 SD0029          26 93   158  233
## 16 SD0031          24 95   119  153
## 17 SN0028          23 94     5    5

Minimum concentrations

print(format(summary_min,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005         0.5 35  5.00 5.00
## 2  SD0008         2.1 29  0.00 0.08
## 3  SD0009         1.0 30  0.00 0.25
## 4  SD0010         1.9 28  0.82 1.98
## 5  SD0011         1.7 36  0.00 0.00
## 6  SD0014         0.7 41  0.42 0.50
## 7  SD0016         2.5 29  0.08 0.25
## 8  SD0017         2.2 35  0.00 0.08
## 9  SD0018         1.6 34  0.00 0.33
## 10 SD0021         1.4 31  0.00 0.00
## 11 SD0023         1.7 30  0.00 0.33
## 12 SD0025         2.4 30  0.00 0.08
## 13 SD0026         3.7 27  0.33 1.08
## 14 SD0027         2.6 31  0.00 0.08
## 15 SD0029         1.7 33  0.00 0.08
## 16 SD0031         2.0 36  0.00 0.08
## 17 SN0028         0.7 37  0.00 0.00

Standard deviation

print(format(summary_sd,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005           6 18    10   11
## 2  SD0008           6 18    39   55
## 3  SD0009           7 19    26   32
## 4  SD0010           6 18    66   90
## 5  SD0011           5 16    38   48
## 6  SD0014           5 16    33   47
## 7  SD0016           6 19    41   52
## 8  SD0017           5 17    49   60
## 9  SD0018           6 18    38   49
## 10 SD0021           6 19    35   48
## 11 SD0023           6 19    17   22
## 12 SD0025           6 19     8   11
## 13 SD0026           6 18    35   47
## 14 SD0027           5 18    34   47
## 15 SD0029           6 18    39   51
## 16 SD0031           5 17    27   34
## 17 SN0028           6 18     1    1

Data availability [%]

print(format(summary_pct,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0005          17 17    17   17
## 2  SD0008          17 17    17   17
## 3  SD0009          17 17    17   17
## 4  SD0010          17 17    17   17
## 5  SD0011          17 17    17   17
## 6  SD0014          17 17    17   17
## 7  SD0016          17 17    17   17
## 8  SD0017          17 17    17   17
## 9  SD0018          17 17    17   17
## 10 SD0021          17 17    17   17
## 11 SD0023          17 17    17   17
## 12 SD0025          17 17    17   17
## 13 SD0026          17 17    17   17
## 14 SD0027          17 17    17   17
## 15 SD0029          17 17    17   17
## 16 SD0031          17 17    17   17
## 17 SN0028          17 17    17   17

Malfunctioning ODIN

The previous summaries show that ODIN-0005 malfunctioned so it will be removed from the analysis. Also, after the QA process we identified ODIN-0028 as malfunctional so we also removed that unit from the analysis.

remove_idx <- ((all.data$ODINsn == "SD0005") | (all.data$ODINsn == "SN0028"))
all.data[remove_idx,] <- NA
all.data <- na.exclude(all.data)
remove_idx <- ((all.data.tavg$ODINsn == "SD0005") | (all.data.tavg$ODINsn == "SN0028"))
all.data.tavg[remove_idx,] <- NA
all.data.tavg <- na.exclude(all.data.tavg)

Time series

PM2.5

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=PM2.5,colour = ODINsn))

PM10

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=PM10,colour = ODINsn))

Temperature

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=Temperature,colour = ODINsn))

Relative Humidity

ggplot(data = all.data.tavg, aes(x=date)) +
  geom_line(aes(y=RH,colour = ODINsn))

Average Maps

# Some useful constants
proj4string_NZTM <- CRS('+init=epsg:2193')
proj4string_latlon <- CRS('+init=epsg:4326')
# Assign coordinates to the dataframe
summary_mean_map <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn, all.data.tavg, FUN = mean)
coordinates(summary_mean_map) <- ~ lon + lat
proj4string(summary_mean_map) <- proj4string_latlon

# Get the basemap
centre_lat <- mean(summary_mean_map$lat)
centre_lon <- mean(summary_mean_map$lon)
ca <- get_googlemap(
  c(lon=centre_lon,lat=centre_lat),
  zoom=13,
  scale=2,
  color="bw",
  key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=-38.661357,178.019815&zoom=13&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg

PM2.5

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM2.5)),
                          name = "PM2.5", oob=squish)

PM10

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM10),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM10)),
                          name = "PM10", oob=squish)

PMcoarse

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map$PM10 - summary_mean_map$PM2.5)),
                          name = "PMcoarse", oob=squish)

# Save the "all.data" dataframe
save(all.data,file = 'alldata.RData')
save(all.data.tavg,file = 'alldataTAVG.RData')
data.output <- all.data.tavg[,c('date','PM2.5','PM10','Temperature','RH','ODINsn')]
write_excel_csv(data.output,'./gisborne_10min.csv')