library(magrittr)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.3-4, (SVN revision 766)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.2, released 2017/09/15
##  Path to GDAL shared files: /usr/share/gdal/2.2
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.3-1
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(gdalUtils)
library(fst)
library(future.apply)
## Loading required package: future
## 
## Attaching package: 'future'
## The following object is masked from 'package:raster':
## 
##     values
## 
## Attaching package: 'future.apply'
## The following object is masked from 'package:future':
## 
##     future_lapply
###################################################################
# Fixed parameters

# Number of processes to run at once
future_workers = 3L 

# Variables to keep in final table
to_keep = c(
  "filename", 
  "date",
  "lon",
  "lat",
  "LST_Day_1km", 
  "QC_Day", 
  "Day_view_time", 
  # "Day_view_angl", 
  "LST_Night_1km", 
  "QC_Night", 
  "Night_view_time", 
  # "Night_view_angl", 
  "Emis_31", 
  "Emis_32"#, 
  # "Clear_day_cov", 
  # "Clear_night_cov",
)

# Directory with HDF files, possibly in sub-directories
dir = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico/data"

# Output directory
out = "/home/michael/Dropbox/BGU/Itai_Kloog/p_55_processing_MODIS_LST_from_HDF_to_table/example_02_mexico"

###################################################################

# Start
start = Sys.time()

# Get list of files
files = list.files(path = dir, pattern = "\\.hdf$", recursive = TRUE, full.names = TRUE)

# Function for reading HDF file and turning it into table
read_hdf = function(input_file) {
  
  # Get subdataset paths & variable names
  s = get_subdatasets(input_file)
  vars = gsub(".*:", "", s)
  
  # Read from HDF to multi-band raster
  r = stack()
  for(i in s) {
    tmp = i %>% readGDAL(silent = TRUE) %>% raster
    r = stack(r, tmp)
  }
  names(r) = vars
  
  # To points
  pnt = rasterToPoints(r, spatial = TRUE)
  pnt = spTransform(pnt, "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
  
  # To table
  dat = as.data.frame(pnt)
  
  # Rename 'x-y' to 'lon-lat'
  dat$lon = dat$x; dat$lat = dat$y
  dat$x = NULL; dat$y = NULL
  
  # Add metadata
  dat$filename = basename(input_file)
  dat$date = 
    basename(input_file) %>% 
    strsplit("\\.") %>% 
    sapply("[", 2) %>% 
    gsub("A", "", .) %>% 
    as.Date(format = "%Y%j")
  
  # Subset variables to keep
  dat = dat[, to_keep]
  
  # Return
  return(dat)
  
}

# Tell future to use the multiprocess backend
plan(multiprocess, workers = future_workers) 

# Send work to the workers
output = future_lapply(files, FUN = read_hdf) 

# Combine to one table
final = do.call(rbind, output)

# Write table
write.fst(x = final, path = file.path(out, "final.fst"), compress = 80)

# End
end = Sys.time()

# Overall timing
end - start
## Time difference of 20.09542 secs
# Examine final table
head(final)
##                                        filename       date       lon
## 1 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4604
## 2 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4508
## 3 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4412
## 4 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4315
## 5 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4219
## 6 MOD11A1.A2004091.h08v06.006.2015218010534.hdf 2004-03-31 -115.4123
##        lat LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night
## 1 29.99583      303.38     81          10.5        285.98       17
## 2 29.99583      302.92     81          10.5        285.72       17
## 3 29.99583      301.68     81          10.5        285.98       17
## 4 29.99583      301.46     81          10.5        286.54       17
## 5 29.99583      300.96     81          10.5        287.04       17
## 6 29.99583      300.82     81          10.5        286.86       17
##   Night_view_time Emis_31 Emis_32
## 1            22.4   0.972   0.976
## 2            22.4   0.972   0.976
## 3            22.4   0.972   0.976
## 4            22.4   0.972   0.976
## 5            22.4   0.972   0.976
## 6            22.4   0.972   0.976
tail(final)
##                                              filename       date       lon
## 8639995 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.43611
## 8639996 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.42765
## 8639997 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.41918
## 8639998 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.41072
## 8639999 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.40226
## 8640000 MOD11A1.A2004093.h08v07.006.2015218024806.hdf 2004-04-02 -91.39380
##              lat LST_Day_1km QC_Day Day_view_time LST_Night_1km QC_Night
## 8639995 10.00417          NA      3            NA            NA        3
## 8639996 10.00417          NA      3            NA            NA        3
## 8639997 10.00417          NA      3            NA            NA        3
## 8639998 10.00417          NA      3            NA            NA        3
## 8639999 10.00417          NA      3            NA            NA        3
## 8640000 10.00417          NA      3            NA            NA        3
##         Night_view_time Emis_31 Emis_32
## 8639995              NA      NA      NA
## 8639996              NA      NA      NA
## 8639997              NA      NA      NA
## 8639998              NA      NA      NA
## 8639999              NA      NA      NA
## 8640000              NA      NA      NA