library(magrittr)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.3-4, (SVN revision 766)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: /usr/share/gdal/2.2
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: (autodetected)
## Linking to sp version: 1.3-1
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:magrittr':
##
## extract
library(gdalUtils)
library(fst)
library(future.apply)
## Loading required package: future
##
## Attaching package: 'future'
## The following object is masked from 'package:raster':
##
## values
##
## Attaching package: 'future.apply'
## The following object is masked from 'package:future':
##
## future_lapply
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:raster':
##
## shift
library(here)
## here() starts at /home/michael
###################################################################
# Fixed parameters
# Number of processes to run at once
future_workers = 3L
# Variables to keep in final table
to_keep = c(
#"filename",
"date",
"lon",
"lat",
"LST_Day_1km",
"QC_Day",
"Day_view_time",
# "Day_view_angl",
"LST_Night_1km",
"QC_Night",
"Night_view_time",
# "Night_view_angl",
"Emis_31",
"Emis_32"#,
# "Clear_day_cov",
# "Clear_night_cov",
)
# Directory with HDF files, possibly in sub-directories
dir = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico/data"
# dir <- here("data")
# Output directory
out = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico"
# out <- here("intermediate")
###################################################################
# Start
start = Sys.time()
# Get list of files
files = list.files(path = dir, pattern = "\\.hdf$", recursive = TRUE, full.names = TRUE)
# Function for reading HDF file and turning it into table
read_hdf = function(input_file) {
# Get subdataset paths & variable names
s = get_subdatasets(input_file)
vars = gsub(".*:", "", s)
# Read from HDF to multi-band raster
r = stack()
for(i in s) {
tmp = i %>% readGDAL(silent = TRUE) %>% raster
r = stack(r, tmp)
}
names(r) = vars
# To points
pnt = rasterToPoints(r, spatial = TRUE)
pnt = spTransform(pnt, "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
# To table
dat = as.data.frame(pnt)
# coerce to data.table
setDT(dat)
# Rename 'x-y' to 'lon-lat'
setnames(dat, c("x", "y"), c("lon", "lat"))
# Add metadata
# dat$filename = basename(input_file) # too redundant to store
dat[, day :=
basename(input_file) %>%
strsplit("\\.") %>%
sapply("[", 2) %>%
gsub("A", "", .) %>%
as.Date(format = "%Y%j")] # day is a better name because date conflicts with many R functions
# Subset variables to keep
dat[, setdiff(names(dat), to_keep) := NULL]
# Return
return(dat)
}
# Tell future to use the multiprocess backend
plan(multiprocess, workers = future_workers)
# Send work to the workers
output = future_lapply(files, FUN = read_hdf)
# Combine to one table with dat.table
final = rbindlist(output)
# Write table
write.fst(x = final, path = file.path(out, "final.fst"), compress = 80)
# End
end = Sys.time()
# Overall timing
end - start
## Time difference of 25.17534 secs
# Examine final table
head(final)
## LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night Night_view_time
## 1: 303.38 81 10.5 285.98 17 22.4
## 2: 302.92 81 10.5 285.72 17 22.4
## 3: 301.68 81 10.5 285.98 17 22.4
## 4: 301.46 81 10.5 286.54 17 22.4
## 5: 300.96 81 10.5 287.04 17 22.4
## 6: 300.82 81 10.5 286.86 17 22.4
## Emis_31 Emis_32 lon lat
## 1: 0.972 0.976 -115.4604 29.99583
## 2: 0.972 0.976 -115.4508 29.99583
## 3: 0.972 0.976 -115.4412 29.99583
## 4: 0.972 0.976 -115.4315 29.99583
## 5: 0.972 0.976 -115.4219 29.99583
## 6: 0.972 0.976 -115.4123 29.99583
tail(final)
## LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night Night_view_time
## 1: NA 3 NA NA 3 NA
## 2: NA 3 NA NA 3 NA
## 3: NA 3 NA NA 3 NA
## 4: NA 3 NA NA 3 NA
## 5: NA 3 NA NA 3 NA
## 6: NA 3 NA NA 3 NA
## Emis_31 Emis_32 lon lat
## 1: NA NA -91.43611 10.00417
## 2: NA NA -91.42765 10.00417
## 3: NA NA -91.41918 10.00417
## 4: NA NA -91.41072 10.00417
## 5: NA NA -91.40226 10.00417
## 6: NA NA -91.39380 10.00417