This script parses and cleans the ODIN-SD data from their memory cards to then calculate summary statistics and plot draft maps of PM10 and PM2.5
library(librarian) # To more flexibly manage packages
shelf(readr,
openair,
automap,
raster,
gstat,
sp,
rgdal,
ggmap,
ggplot2,
scales)
## Warning in shelf(readr, openair, automap, raster, gstat, sp, rgdal, ggmap, : cran_repo = '@CRAN@' is not a valid URL.
## Defaulting to cran_repo = 'https://cran.r-project.org'.
## Loading required package: sp
## rgdal: version: 1.3-4, (SVN revision 766)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
## Path to GDAL shared files: /usr/share/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: (autodetected)
## Linking to sp version: 1.3-1
## Loading required package: ggplot2
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
data_path <- path.expand("~/data/ODIN_IDAHO/2019/RAW/FromPortal/")
files_list <- dir(data_path,pattern = 'SD')
# Define time average for output
tavg <- '1 hour'
Get devices locations
odin_locations <- readr::read_delim(paste0(data_path,"odin_locations.txt"),
"\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## ODIN = col_character(),
## lat = col_double(),
## lon = col_double()
## )
Cycle through the folders to work with all the DATA.TXT files
for (i in (1:length(files_list))){
file <- files_list[i]
print(file)
odin.data <- readr::read_delim(paste0(data_path,file),
delim = ',',
skip = 1,
col_names = c('date',
'PM1',
'PM2.5',
'PM10',
'Temperature',
'RH'))
# Add serial number to the dataframe
odin.data$ODINsn <- substr(file,1,6)
# Add location to the dataframe
# Find the location of the current ODIN
odin_id <- which(odin_locations$ODIN == substr(file,1,6))
odin.data$lat <- odin_locations$lat[odin_id]
odin.data$lon <- -odin_locations$lon[odin_id]
# Construct the ALLDATA frame
if (i == 1){
# This is the first iteration so we just copy the "odin.data" dataframe
all.data <- odin.data
all.data.tavg <- timeAverage(odin.data,avg.time = tavg)
all.data.tavg$ODINsn <- odin.data$ODINsn[1]
} else {
# We already have "all.data" so we need to append the current "odin.data"
all.data <- rbind(all.data,odin.data)
tmp1 <- timeAverage(odin.data,avg.time = tavg)
tmp1$ODINsn <- odin.data$ODINsn[1]
all.data.tavg <- rbind(all.data.tavg,tmp1)
# Remove all.data to clean for next iteration
rm(odin.data)
}
}
## [1] "SD0006_20190129160100.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0015_20190201201300.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0032_20190131214700.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0033_20190131211200.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0039_20190131204500.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0041_20190201192300.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0044_20190201161900.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0044_20190201181900.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0045_20190201185800.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0045_20190201205800.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0048_20190131213900.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0050_20190201145500.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0056_20190201135000.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0060_20190131195700.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0062_20190201160800.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0062_20190201180800.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0065_20190201163700.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0065_20190201183700.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
## [1] "SD0067_20190201172300.csv"
## Parsed with column specification:
## cols(
## date = col_datetime(format = ""),
## PM1 = col_double(),
## PM2.5 = col_double(),
## PM10 = col_double(),
## Temperature = col_double(),
## RH = col_double()
## )
Note that the statistics are calculated on 10 minutes averages and that the campaign went from January 29th 16:00 UTC until March 1st 15:50 UTC
The units for the parameters are: * PM2.5 [\(mu\)g/m3] * PM10 [\(mu\)g/m3] * Temperature [celsius] * RH [%]
# Calculate the summary table for each unit
summary_mean <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = mean)
summary_max <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = max)
summary_min <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = min)
summary_sd <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = sd)
summary_N <- aggregate(cbind(Temperature, RH,PM2.5, PM10) ~ODINsn, all.data, FUN = length)
print(format(summary_mean,digits = 1))
## ODINsn Temperature RH PM2.5 PM10
## 1 SD0006 -3.4 63 4.8 5
## 2 SD0015 -0.6 65 9.0 12
## 3 SD0032 -2.0 69 1.6 2
## 4 SD0033 -1.0 67 2.4 4
## 5 SD0039 -2.2 71 0.9 1
## 6 SD0041 -1.5 68 4.0 4
## 7 SD0044 -0.9 67 4.8 7
## 8 SD0045 -1.1 69 2.6 4
## 9 SD0048 -1.9 69 4.3 6
## 10 SD0050 -0.2 65 3.4 4
## 11 SD0056 1.5 63 2.1 2
## 12 SD0060 -1.5 69 10.8 17
## 13 SD0062 -0.7 67 3.3 4
## 14 SD0065 0.7 62 1.8 2
## 15 SD0067 -2.2 73 9.6 12
print(format(summary_max,digits = 1))
## ODINsn Temperature RH PM2.5 PM10
## 1 SD0006 22 89 35 40
## 2 SD0015 22 93 70 121
## 3 SD0032 18 95 17 25
## 4 SD0033 22 93 23 95
## 5 SD0039 15 94 27 44
## 6 SD0041 22 95 97 117
## 7 SD0044 24 91 84 146
## 8 SD0045 30 94 61 98
## 9 SD0048 15 93 29 38
## 10 SD0050 26 89 108 220
## 11 SD0056 22 87 17 20
## 12 SD0060 14 94 97 170
## 13 SD0062 28 91 32 50
## 14 SD0065 41 89 84 119
## 15 SD0067 18 94 110 134
print(format(summary_min,digits = 1))
## ODINsn Temperature RH PM2.5 PM10
## 1 SD0006 -18 18 0 0
## 2 SD0015 -15 19 0 0
## 3 SD0032 -18 21 0 0
## 4 SD0033 -17 15 0 0
## 5 SD0039 -15 19 0 0
## 6 SD0041 -16 18 0 0
## 7 SD0044 -14 13 0 0
## 8 SD0045 -16 12 0 0
## 9 SD0048 -16 26 0 0
## 10 SD0050 -14 14 0 0
## 11 SD0056 -10 14 0 0
## 12 SD0060 -17 23 0 0
## 13 SD0062 -15 14 0 0
## 14 SD0065 -14 6 0 0
## 15 SD0067 -19 15 0 0
print(format(summary_sd,digits = 1))
## ODINsn Temperature RH PM2.5 PM10
## 1 SD0006 8 16 6 6
## 2 SD0015 6 19 9 12
## 3 SD0032 6 16 2 3
## 4 SD0033 7 18 3 4
## 5 SD0039 5 15 1 2
## 6 SD0041 6 18 5 5
## 7 SD0044 7 17 6 8
## 8 SD0045 7 20 4 6
## 9 SD0048 5 15 4 5
## 10 SD0050 6 18 7 9
## 11 SD0056 6 18 3 4
## 12 SD0060 5 15 13 21
## 13 SD0062 7 18 4 5
## 14 SD0065 8 20 4 5
## 15 SD0067 6 15 12 16
ggplot(data = all.data, aes(x=date)) +
geom_line(aes(y=PM2.5,colour = ODINsn))
ggplot(data = all.data, aes(x=date)) +
geom_line(aes(y=PM10,colour = ODINsn))
ggplot(data = all.data, aes(x=date)) +
geom_line(aes(y=Temperature,colour = ODINsn))
ggplot(data = all.data, aes(x=date)) +
geom_line(aes(y=RH,colour = ODINsn))
ggplot(data = all.data, aes(x=RH)) +
geom_point(aes(y=PM2.5,colour = ODINsn))
ggplot(data = all.data, aes(x=Temperature)) +
geom_point(aes(y=PM2.5,colour = ODINsn))
# Some useful constants
proj4string_NZTM <- CRS('+init=epsg:2193')
proj4string_latlon <- CRS('+init=epsg:4326')
# Calculate the summaries for cold and warm periods
temperature_threshold <- 5
summary_mean_map_cold <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn,
subset(all.data,Temperature < temperature_threshold),
FUN = mean)
summary_mean_map_warm <- aggregate(cbind(Temperature, RH, PM2.5, PM10, lon, lat) ~ODINsn,
subset(all.data,Temperature >= temperature_threshold),
FUN = mean)
COLD ( < 5 C)
# We need to remove the ODINs not in Kellogg
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0006")
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0067")
summary_mean_map_cold <- subset(summary_mean_map_cold,ODINsn != "SD0060")
# Assign coordinates to the dataframe
coordinates(summary_mean_map_cold) <- ~ lon + lat
proj4string(summary_mean_map_cold) <- proj4string_latlon
# Get the basemap
centre_lat <- mean(summary_mean_map_cold$lat)
centre_lon <- mean(summary_mean_map_cold$lon)
ca <- get_googlemap(
c(lon=centre_lon,lat=centre_lat),
zoom=15,
scale=2,
color="bw",
key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=47.542917,-116.127083&zoom=15&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM2.5)),
name = "PM2.5", oob=squish)
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM10),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM10)),
name = "PM10", oob=squish)
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_cold),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_cold$PM10 - summary_mean_map_cold$PM2.5)),
name = "PMcoarse", oob=squish)
WARM ( <= 5 C)
# We need to remove the ODINs not in Kellogg
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0006")
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0067")
summary_mean_map_warm <- subset(summary_mean_map_warm,ODINsn != "SD0060")
# Assign coordinates to the dataframe
coordinates(summary_mean_map_warm) <- ~ lon + lat
proj4string(summary_mean_map_warm) <- proj4string_latlon
# Get the basemap
centre_lat <- mean(summary_mean_map_warm$lat)
centre_lon <- mean(summary_mean_map_warm$lon)
ca <- get_googlemap(
c(lon=centre_lon,lat=centre_lat),
zoom=15,
scale=2,
color="bw",
key = "AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=47.542917,-116.127083&zoom=15&size=640x640&scale=2&maptype=terrain&sensor=false&key=AIzaSyACi3pNvPQTxZWx5u0nTtke598dPqdgySg
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM2.5),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM2.5)),
name = "PM2.5", oob=squish)
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM10),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM10)),
name = "PM10", oob=squish)
ggmap(ca) +
geom_point(data=as.data.frame(summary_mean_map_warm),aes(x=lon,y=lat,colour = PM10 - PM2.5),size = 5) +
scale_colour_continuous(low="white", high="red",limits=c(0, max(summary_mean_map_warm$PM10 - summary_mean_map_warm$PM2.5)),
name = "PMcoarse", oob=squish)
# Save the "all.data" dataframe
save(all.data,file = 'alldata.RData')
save(all.data.tavg,file = 'alldataTAVG.RData')
data.output <- all.data.tavg[,c('date','PM2.5','PM10','Temperature','RH','ODINsn')]