library(magrittr)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.3-4, (SVN revision 766)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.2, released 2017/09/15
## Path to GDAL shared files: /usr/share/gdal/2.2
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
## Path to PROJ.4 shared files: (autodetected)
## Linking to sp version: 1.3-1
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:magrittr':
##
## extract
library(gdalUtils)
library(fst)
library(future.apply)
## Loading required package: future
##
## Attaching package: 'future'
## The following object is masked from 'package:raster':
##
## values
##
## Attaching package: 'future.apply'
## The following object is masked from 'package:future':
##
## future_lapply
###################################################################
# Fixed parameters
# Number of processes to run at once
future_workers = 3L
# Variables to keep in final table
to_keep = c(
"filename",
"date",
"lon",
"lat",
"LST_Day_1km",
"QC_Day",
"Day_view_time",
# "Day_view_angl",
"LST_Night_1km",
"QC_Night",
"Night_view_time",
# "Night_view_angl",
"Emis_31",
"Emis_32"#,
# "Clear_day_cov",
# "Clear_night_cov",
)
# Directory with HDF files, possibly in sub-directories
dir = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico/data"
# Output directory
out = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico"
###################################################################
# Start
start = Sys.time()
# Get list of files
files = list.files(path = dir, pattern = "\\.hdf$", recursive = TRUE, full.names = TRUE)
# Function for reading HDF file and turning it into table
read_hdf = function(input_file) {
# Get subdataset paths & variable names
s = get_subdatasets(input_file)
vars = gsub(".*:", "", s)
# Read from HDF to multi-band raster
r = stack()
for(i in s) {
tmp = i %>% readGDAL(silent = TRUE) %>% raster
r = stack(r, tmp)
}
names(r) = vars
# To points
pnt = rasterToPoints(r, spatial = TRUE)
pnt = spTransform(pnt, "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
# To table
dat = as.data.frame(pnt)
# Rename 'x-y' to 'lon-lat'
dat$lon = dat$x; dat$lat = dat$y
dat$x = NULL; dat$y = NULL
# Add metadata
dat$filename = basename(input_file)
dat$date =
basename(input_file) %>%
strsplit("\\.") %>%
sapply("[", 2) %>%
gsub("A", "", .) %>%
as.Date(format = "%Y%j")
# Subset variables to keep
dat = dat[, to_keep]
# Return
return(dat)
}
# Tell future to use the multiprocess backend
plan(multiprocess, workers = future_workers)
# Send work to the workers
output = future_lapply(files, FUN = read_hdf)
# Combine to one table
final = do.call(rbind, output)
# Write table
write.fst(x = final, path = file.path(out, "final.fst"), compress = 80)
# End
end = Sys.time()
# Overall timing
end - start
## Time difference of 20.09542 secs
# Examine final table
head(final)
## filename date lon
## 1 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4604
## 2 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4508
## 3 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4412
## 4 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4315
## 5 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4219
## 6 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4123
## lat LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night
## 1 29.99583 303.38 81 10.5 285.98 17
## 2 29.99583 302.92 81 10.5 285.72 17
## 3 29.99583 301.68 81 10.5 285.98 17
## 4 29.99583 301.46 81 10.5 286.54 17
## 5 29.99583 300.96 81 10.5 287.04 17
## 6 29.99583 300.82 81 10.5 286.86 17
## Night_view_time Emis_31 Emis_32
## 1 22.4 0.972 0.976
## 2 22.4 0.972 0.976
## 3 22.4 0.972 0.976
## 4 22.4 0.972 0.976
## 5 22.4 0.972 0.976
## 6 22.4 0.972 0.976
tail(final)
## filename date lon
## 8639995 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.43611
## 8639996 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.42765
## 8639997 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.41918
## 8639998 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.41072
## 8639999 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.40226
## 8640000 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.39380
## lat LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night
## 8639995 10.00417 NA 3 NA NA 3
## 8639996 10.00417 NA 3 NA NA 3
## 8639997 10.00417 NA 3 NA NA 3
## 8639998 10.00417 NA 3 NA NA 3
## 8639999 10.00417 NA 3 NA NA 3
## 8640000 10.00417 NA 3 NA NA 3
## Night_view_time Emis_31 Emis_32
## 8639995 NA NA NA
## 8639996 NA NA NA
## 8639997 NA NA NA
## 8639998 NA NA NA
## 8639999 NA NA NA
## 8640000 NA NA NA