This script parses and cleans the ODIN-SD data from their memory cards to then calculate summary statistics and plot draft maps of PM10 and PM2.5

Prepare libraries

library(librarian) # To more flexibly manage packages
shelf(readr,
      openair,
      automap,
      raster,
      gstat,
      sp,
      rgdal,
      ggmap,
      ggplot2,
      scales)
## Warning in shelf(readr, openair, automap, raster, gstat, sp, rgdal, ggmap, : cran_repo = '@CRAN@' is not a valid URL. 
##                     Defaulting to cran_repo = 'https://cran.r-project.org'.
## Loading required package: sp
## rgdal: version: 1.3-4, (SVN revision 766)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /usr/share/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.3-1
## Loading required package: ggplot2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor

Set constants

data_path <- path.expand("~/data/ODIN_IDAHO/2019/RAW/FromPortal/")
files_list <- dir(data_path,pattern = 'SD')
# Define time average for output
tavg <- '1 hour'

Load data

Get devices locations

odin_locations <- readr::read_delim(paste0(data_path,"odin_locations.txt"), 
                                    "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   ODIN = col_character(),
##   lat = col_double(),
##   lon = col_double()
## )

Cycle through the folders to work with all the DATA.TXT files

for (i in (1:length(files_list))){
  file <- files_list[i]
  print(file)
  odin.data <- readr::read_delim(paste0(data_path,file),
                                 delim = ',',
                                 skip = 1,
                                 col_names = c('date',
                                               'PM1',
                                               'PM2.5',
                                               'PM10',
                                               'Temperature',
                                               'RH'))
  # Add serial number to the dataframe
  odin.data$ODINsn <- substr(file,1,6)
  # Add location to the dataframe
  # Find the location of the current ODIN
  odin_id <- which(odin_locations$ODIN == substr(file,1,6))
  odin.data$lat <- odin_locations$lat[odin_id]
  odin.data$lon <- -odin_locations$lon[odin_id]

  # Construct the ALLDATA frame
  
  if (i == 1){
    # This is the first iteration so we just copy the "odin.data" dataframe
    all.data <- odin.data
    all.data.tavg <- timeAverage(odin.data,avg.time = tavg)
    all.data.tavg$ODINsn <- odin.data$ODINsn[1]
  } else {
    # We already have "all.data" so we need to append the current "odin.data"
    all.data <- rbind(all.data,odin.data)
    tmp1 <- timeAverage(odin.data,avg.time = tavg)
    tmp1$ODINsn <- odin.data$ODINsn[1]
    all.data.tavg <- rbind(all.data.tavg,tmp1)
    # Remove all.data to clean for next iteration
    rm(odin.data)
  }
}
## [1] "SD0006_20190129160100.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0015_20190201201300.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0032_20190131214700.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0033_20190131211200.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0039_20190131204500.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0041_20190201192300.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0044_20190201161900.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0044_20190201181900.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0045_20190201185800.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0045_20190201205800.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0048_20190131213900.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0050_20190201145500.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0056_20190201135000.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0060_20190131195700.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0062_20190201160800.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0062_20190201180800.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0065_20190201163700.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0065_20190201183700.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )
## [1] "SD0067_20190201172300.csv"
## Parsed with column specification:
## cols(
##   date = col_datetime(format = ""),
##   PM1 = col_double(),
##   PM2.5 = col_double(),
##   PM10 = col_double(),
##   Temperature = col_double(),
##   RH = col_double()
## )

Summary statistics

Note that the statistics are calculated on 10 minutes averages and that the campaign went from January 29th 16:00 UTC until March 1st 15:50 UTC

The units for the parameters are: * PM2.5 [\(mu\)g/m3] * PM10 [\(mu\)g/m3] * Temperature [celsius] * RH [%]

# Calculate the summary table for each unit
summary_mean <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = mean)
summary_max <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = max)
summary_min <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = min)
summary_sd <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = sd)
summary_N <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = length)

Average concentrations

print(format(summary_mean,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0006        -3.4 63   4.8    5
## 2  SD0015        -0.6 65   9.0   12
## 3  SD0032        -2.0 69   1.6    2
## 4  SD0033        -1.0 67   2.4    4
## 5  SD0039        -2.2 71   0.9    1
## 6  SD0041        -1.5 68   4.0    4
## 7  SD0044        -0.9 67   4.8    7
## 8  SD0045        -1.1 69   2.6    4
## 9  SD0048        -1.9 69   4.3    6
## 10 SD0050        -0.2 65   3.4    4
## 11 SD0056         1.5 63   2.1    2
## 12 SD0060        -1.5 69  10.8   17
## 13 SD0062        -0.7 67   3.3    4
## 14 SD0065         0.7 62   1.8    2
## 15 SD0067        -2.2 73   9.6   12

Maximum concentrations

print(format(summary_max,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0006          22 89    35   40
## 2  SD0015          22 93    70  121
## 3  SD0032          18 95    17   25
## 4  SD0033          22 93    23   95
## 5  SD0039          15 94    27   44
## 6  SD0041          22 95    97  117
## 7  SD0044          24 91    84  146
## 8  SD0045          30 94    61   98
## 9  SD0048          15 93    29   38
## 10 SD0050          26 89   108  220
## 11 SD0056          22 87    17   20
## 12 SD0060          14 94    97  170
## 13 SD0062          28 91    32   50
## 14 SD0065          41 89    84  119
## 15 SD0067          18 94   110  134

Minimum concentrations

print(format(summary_min,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0006         -18 18     0    0
## 2  SD0015         -15 19     0    0
## 3  SD0032         -18 21     0    0
## 4  SD0033         -17 15     0    0
## 5  SD0039         -15 19     0    0
## 6  SD0041         -16 18     0    0
## 7  SD0044         -14 13     0    0
## 8  SD0045         -16 12     0    0
## 9  SD0048         -16 26     0    0
## 10 SD0050         -14 14     0    0
## 11 SD0056         -10 14     0    0
## 12 SD0060         -17 23     0    0
## 13 SD0062         -15 14     0    0
## 14 SD0065         -14  6     0    0
## 15 SD0067         -19 15     0    0

Standard deviation

print(format(summary_sd,digits = 1))
##    ODINsn Temperature RH PM2.5 PM10
## 1  SD0006           8 16     6    6
## 2  SD0015           6 19     9   12
## 3  SD0032           6 16     2    3
## 4  SD0033           7 18     3    4
## 5  SD0039           5 15     1    2
## 6  SD0041           6 18     5    5
## 7  SD0044           7 17     6    8
## 8  SD0045           7 20     4    6
## 9  SD0048           5 15     4    5
## 10 SD0050           6 18     7    9
## 11 SD0056           6 18     3    4
## 12 SD0060           5 15    13   21
## 13 SD0062           7 18     4    5
## 14 SD0065           8 20     4    5
## 15 SD0067           6 15    12   16

Time series

PM2.5

ggplot(data = all.data, aes(x=date)) +
  geom_line(aes(y=PM2.5,colour = ODINsn))

PM10

ggplot(data = all.data, aes(x=date)) +
  geom_line(aes(y=PM10,colour = ODINsn))

Temperature

ggplot(data = all.data, aes(x=date)) +
  geom_line(aes(y=Temperature,colour = ODINsn))

Relative Humidity

ggplot(data = all.data, aes(x=date)) +
  geom_line(aes(y=RH,colour = ODINsn))

ggplot(data = all.data, aes(x=RH)) +
  geom_point(aes(y=PM2.5,colour = ODINsn))

ggplot(data = all.data, aes(x=Temperature)) +
  geom_point(aes(y=PM2.5,colour = ODINsn))

Average Maps

# Some useful constants
proj4string_NZTM <- CRS('+init=epsg:2193')
proj4string_latlon <- CRS('+init=epsg:4326')
# Calculate the summaries for cold and warm periods
temperature_threshold <- 5
summary_mean_map_cold <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn,
                                   subset(all.data,Temperature < temperature_threshold),
                                   FUN = mean)
summary_mean_map_warm <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn,
                                   subset(all.data,Temperature >= temperature_threshold),
                                   FUN = mean)

COLD ( < 5 C)

# We need to remove the ODINs not in Kellogg
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0006")
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0067")
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0060")
# Assign coordinates to the dataframe
coordinates(summary_mean_map_cold) <- ~ lon + lat
proj4string(summary_mean_map_cold) <- proj4string_latlon

# Get the basemap
centre_lat <- mean(summary_mean_map_cold$lat)
centre_lon <- mean(summary_mean_map_cold$lon)
ca <- get_googlemap(
  c(lon=centre_lon,lat=centre_lat),
  zoom=15,
  scale=2,
  color="bw",
  key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=47.542917,-116.127083&zoom=15&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg

PM2.5

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM2.5)),
                          name = "PM2.5", oob=squish)

PM10

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM10),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM10)),
                          name = "PM10", oob=squish)

PMcoarse

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM10 - summary_mean_map_cold$PM2.5)),
                          name = "PMcoarse", oob=squish)

WARM ( <= 5 C)

# We need to remove the ODINs not in Kellogg
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0006")
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0067")
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0060")
# Assign coordinates to the dataframe
coordinates(summary_mean_map_warm) <- ~ lon + lat
proj4string(summary_mean_map_warm) <- proj4string_latlon

# Get the basemap
centre_lat <- mean(summary_mean_map_warm$lat)
centre_lon <- mean(summary_mean_map_warm$lon)
ca <- get_googlemap(
  c(lon=centre_lon,lat=centre_lat),
  zoom=15,
  scale=2,
  color="bw",
  key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=47.542917,-116.127083&zoom=15&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg

PM2.5

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM2.5)),
                          name = "PM2.5", oob=squish)

PM10

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM10),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM10)),
                          name = "PM10", oob=squish)

PMcoarse

ggmap(ca) + 
  geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
  scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM10 - summary_mean_map_warm$PM2.5)),
                          name = "PMcoarse", oob=squish)

# Save the "all.data" dataframe
save(all.data,file = 'alldata.RData')
save(all.data.tavg,file = 'alldataTAVG.RData')
data.output <- all.data.tavg[,c('date','PM2.5','PM10','Temperature','RH','ODINsn')]