1. Introduction

This notebook illustrates regression of soil moisture from multispectral & thermal imagery obtained in a field campaign over a potato crop farm. Its main purpose is to guide data processing tasks for the HERMES project: Agro-Geoinformática: Detección temprana in situ de enfermedades a nivel foliar empleando imágenes espectrales. Please cite this work at the beginning of your notebook.

Let’s start by cleaning memory and loading the required libraries:

rm(list=ls())
library(sf)
library(stars)
library(leaflet)
library(leafem)
library(dplyr)
library(raster)
library(RColorBrewer)
library(randomForest)
library(rfUtilities)
library(tidyr)

2. Reading input data

Let’s read a shapefile representing the study area:

(aoi <- st_read("C:\\datos\\crops\\tenjo\\muestreo\\area_estudio.shp"))
## Reading layer `area_estudio' from data source 
##   `C:\datos\crops\tenjo\muestreo\area_estudio.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 990223.4 ymin: 1027816 xmax: 990363.4 ymax: 1027945
## CRS:           NA

Note that the CRS has not been defined yet.

Now, let’s read a file containing 60 sampling points where soil moisture was measured:

(ptos <- st_read("../crops/tenjo/muestreo/puntos_13_03.shp"))
## Reading layer `puntos_13_03' from data source 
##   `C:\datos\crops\tenjo\muestreo\puntos_13_03.shp' using driver `ESRI Shapefile'
## Simple feature collection with 60 features and 19 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 990235 ymin: 1027825 xmax: 990357.3 ymax: 1027938
## Projected CRS: MAGNA-SIRGAS / Colombia Bogota zone

Now, let’s check the values of soil moisture in the top 5 cm as measured by a time-domain reflectometer (TDR) device. This property is stored in the attribute T of ptos:

var = "T"
ptos[var]
hist(ptos$T)

Let’s visualize the samples:

getColor <- function(ptos) {
  sapply(ptos$T, function(T) {
  if(T >= 50) {
    "blue"
  } else if(T < 10) {
    "orange"
  } else {
    "green"
  } })
}

icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'black',
  library = 'ion',
  markerColor = getColor(ptos)
)

leaflet(data = ptos, options = leafletOptions(maxZoom=30))  %>% addTiles() %>%
  addAwesomeMarkers(~W, ~N, icon=icons, popup = ~as.character(T), label = ~as.character(T))

It is important to select only relevant attributes:

dplyr::select(ptos, Punto, T) -> nptos
nptos

We also need the multispectral & thermal imagery for the study area.

list.files(("../crops/tenjo/marzo13/output/"))
##  [1] "DemVuelo1_4cm.tif"             "DemVuelo2_4cm.tif"            
##  [3] "MosaicoV1.tif"                 "MosaicoV1_lote.tif"           
##  [5] "MosaicoVuelo1_4cm.tif.aux.xml" "MosaicoVuelo2_4cm.tif"        
##  [7] "NubePuntosVuelo1.las"          "NubePuntosVuelo2.las"         
##  [9] "SM_IDW.tif"                    "SM_OK.tif"                    
## [11] "V1_B6.tif"                     "V1_B6_aoi.tif"                
## [13] "V1_B6_lote.tif"                "Vuelo1.pdf"                   
## [15] "Vuelo2.pdf"

The image comprises five optical bands and one thermal band collected using an Altum camera from Micasense:

bands = c("Blue", "Green", "Red", "RedEdge", "NIR", "Thermal")
x = read_stars("../crops/tenjo/marzo13/output/MosaicoV1_lote.tif", RasterIO = list(bands))

Let’s check what is x:

plot(x, axes = TRUE)
## downsample set to c(7,7,1)

(x.spl = split(x, "band"))
## stars object with 2 dimensions and 6 attributes
## attribute(s), summary of first 1e+05 cells:
##                Min.     1st Qu.      Median        Mean     3rd Qu.        Max.
## X1 [m]  0.009348963  0.01262100  0.01394725  0.01792325  0.01633260  0.08202808
## X2 [m]  0.007785690  0.01121722  0.01277314  0.02509133  0.01598043  0.19189647
## X3 [m]  0.010001210  0.01699185  0.01887484  0.02189363  0.02203600  0.06591093
## X4 [m]  0.011644279  0.01857926  0.02126069  0.05011741  0.02968097  0.36176619
## X5 [m]  0.015311529  0.02503854  0.02927613  0.08499635  0.04497424  0.73622215
## X6 [m] 15.567840576 17.13777971 17.49541950 17.34348364 17.69926691 18.15388489
##         NA's
## X1 [m] 99060
## X2 [m] 99060
## X3 [m] 99060
## X4 [m] 99060
## X5 [m] 99060
## X6 [m] 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]
names(x.spl) <- bands
x.spl
## stars object with 2 dimensions and 6 attributes
## attribute(s), summary of first 1e+05 cells:
##                     Min.     1st Qu.      Median        Mean     3rd Qu.
## Blue [m]     0.009348963  0.01262100  0.01394725  0.01792325  0.01633260
## Green [m]    0.007785690  0.01121722  0.01277314  0.02509133  0.01598043
## Red [m]      0.010001210  0.01699185  0.01887484  0.02189363  0.02203600
## RedEdge [m]  0.011644279  0.01857926  0.02126069  0.05011741  0.02968097
## NIR [m]      0.015311529  0.02503854  0.02927613  0.08499635  0.04497424
## Thermal [m] 15.567840576 17.13777971 17.49541950 17.34348364 17.69926691
##                    Max.  NA's
## Blue [m]     0.08202808 99060
## Green [m]    0.19189647 99060
## Red [m]      0.06591093 99060
## RedEdge [m]  0.36176619 99060
## NIR [m]      0.73622215 99060
## Thermal [m] 18.15388489 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]

We plot the new image:

plot(x.spl[1], breaks = "equal", key.pos = 4)
## downsample set to c(2,2)

#thermal band
x %>% slice(band, 6) -> x6
x6
## stars object with 2 dimensions and 1 attribute
## attribute(s), summary of first 1e+05 cells:
##                            Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
## MosaicoV1_lote.tif [m] 15.56784 17.13778 17.49542 17.34348 17.69927 18.15388
##                         NA's
## MosaicoV1_lote.tif [m] 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]
hist(x6)

Note that, for band 6, digital numbers represent temperature in Celsius degrees.

Now, the near-infrared band (nir):

#nir band
x %>% slice(band, 5) -> x5
x5
## stars object with 2 dimensions and 1 attribute
## attribute(s), summary of first 1e+05 cells:
##                              Min.    1st Qu.     Median       Mean    3rd Qu.
## MosaicoV1_lote.tif [m] 0.01531153 0.02503854 0.02927613 0.08499635 0.04497424
##                             Max.  NA's
## MosaicoV1_lote.tif [m] 0.7362221 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]
hist(x5)

In the nir band, digital numbers represent surface reflectance.

Now, the red-edge band:

#red-edge band
x %>% slice(band, 4) -> x4
x4
## stars object with 2 dimensions and 1 attribute
## attribute(s), summary of first 1e+05 cells:
##                              Min.    1st Qu.     Median       Mean    3rd Qu.
## MosaicoV1_lote.tif [m] 0.01164428 0.01857926 0.02126069 0.05011741 0.02968097
##                             Max.  NA's
## MosaicoV1_lote.tif [m] 0.3617662 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]
hist(x4)

Let’s visualize both the thermal band and the measured soil moisture:

leaflet(data=ptos, options = leafletOptions(maxZoom=30)) %>%
  addProviderTiles("OpenStreetMap") %>%
  addStarsImage(x, band=6, project = FALSE) %>% 
  addAwesomeMarkers(~W, ~N, icon=icons, popup = ~as.character(T), label = ~as.character(T))

4. Regression of soil moisture

First, we need to extract the imagery values for the sampled points:

st_extract(x.spl, nptos) %>% st_as_sf() -> datasample
datasample
plot(datasample)

Now, we add soil moisture values to the datasample object:

datasample$sm = nptos$T # no need for join, since the order did not change
datasample

We need to convert the sf object into a dataframe:

(datasample = as.data.frame(datasample))
head(datasample)

It is important to remove NA values:

datasample %>% drop_na() -> nsample
nsample

Note that four sampled points were removed.

We need also to remove the geometry:

nsample$geometry = NULL 

Now, we create a non-spatial random forest model to estimate soil moisture:

sm.rf = randomForest(sm ~ ., nsample, importance = TRUE) # 

Note that this a basic model. You may be interested in reading this post and trying a different model.

Let´s check the model:

sm.rf
## 
## Call:
##  randomForest(formula = sm ~ ., data = nsample, importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##           Mean of squared residuals: 210.4556
##                     % Var explained: 4.85

We want to know importance of variables:

varImpPlot(sm.rf)

It is convenient to conduct a permutation test cross-validation for the regression model:

# For regression
( rf.cv <- rf.crossValidation(sm.rf, nsample[,1:6], 
                              p=0.10, n=99, ntree=501))
## running: regression cross-validation with 99 iterations
## Fit MSE = 219.3659 
## Fit percent variance explained = 4.85 
## Median permuted MSE = 219.8803 
## Median permuted percent variance explained = 2.92 
## Median cross-validation RMSE = 13.22084 
## Median cross-validation MBE = 1.221005 
## Median cross-validation MAE = 10.8934 
## Range of ks p-values = 4.286694e-05 0.9845679 
## Range of ks D statistic = 0.1666667 0.8333333 
## RMSE cross-validation error variance = 14.39701 
## MBE cross-validation error variance = 38.742 
## MAE cross-validation error variance = 9.307109
par(mfrow=c(2,2))
   plot(rf.cv)  
   plot(rf.cv, stat = "mse")
   plot(rf.cv, stat = "var.exp")
plot(rf.cv, stat = "mae")

Do not forget to include the reported MSE and RSME values in your report. See this link for interpreting the output of cross validation tests.

Now, the prediction task:

pr = predict(x.spl, sm.rf)
pr
## stars object with 2 dimensions and 1 attribute
## attribute(s), summary of first 1e+05 cells:
##                 Min. 1st Qu.   Median     Mean  3rd Qu.     Max.  NA's
## prediction  13.46042 14.3295 14.46363 18.57953 23.89528 42.70075 99060
## dimension(s):
##   from   to  offset delta                       refsys point values x/y
## x    1 3497  990223  0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [x]
## y    1 3227 1027945 -0.04 MAGNA-SIRGAS / Colombia B... FALSE   NULL [y]
leaflet(data=ptos, options = leafletOptions(maxZoom=30)) %>%
  addProviderTiles("OpenStreetMap") %>%
  addStarsImage(pr, band=1, project = FALSE, maxBytes = 10000000) %>% 
  addAwesomeMarkers(~W, ~N, icon=icons, popup = ~as.character(T), label = ~as.character(T))

Save the estimated soil moisture:

write_stars(pr, "SM_RandomForest_13_03.tif")

5. Further tasks

You may be interested in improving the regression model. Several suggestions follow:

6. Reproducibility

If you reuse this code for your research please cite as: Lizarazo, I., 2021. Exploring Random Forest-based soil moisture regression from UAV images. Available at https://rpubs.com/ials2un/erfreg.

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Colombia.1252  LC_CTYPE=Spanish_Colombia.1252   
## [3] LC_MONETARY=Spanish_Colombia.1252 LC_NUMERIC=C                     
## [5] LC_TIME=Spanish_Colombia.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.1.3         rfUtilities_2.1-5   randomForest_4.6-14
##  [4] RColorBrewer_1.1-2  raster_3.4-13       sp_1.4-5           
##  [7] dplyr_1.0.7         leafem_0.1.6        leaflet_2.0.4.1    
## [10] stars_0.5-3         abind_1.4-5         sf_1.0-2           
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.1        xfun_0.22               bslib_0.2.5.1          
##  [4] purrr_0.3.4             lattice_0.20-41         colorspace_2.0-2       
##  [7] leaflet.providers_1.9.0 vctrs_0.3.8             generics_0.1.0         
## [10] htmltools_0.5.1.1       yaml_2.2.1              base64enc_0.1-3        
## [13] utf8_1.2.2              rlang_0.4.10            e1071_1.7-8            
## [16] jquerylib_0.1.4         pillar_1.6.2            glue_1.4.2             
## [19] DBI_1.1.1               lifecycle_1.0.0         stringr_1.4.0          
## [22] munsell_0.5.0           htmlwidgets_1.5.3       codetools_0.2-18       
## [25] evaluate_0.14           knitr_1.33              crosstalk_1.1.1        
## [28] parallel_4.0.5          class_7.3-18            fansi_0.5.0            
## [31] highr_0.9               Rcpp_1.0.7              KernSmooth_2.23-18     
## [34] scales_1.1.1            classInt_0.4-3          lwgeom_0.2-7           
## [37] jsonlite_1.7.2          farver_2.1.0            png_0.1-7              
## [40] digest_0.6.27           stringi_1.7.4           grid_4.0.5             
## [43] tools_4.0.5             magrittr_2.0.1          sass_0.4.0             
## [46] proxy_0.4-26            tibble_3.1.4            crayon_1.4.1           
## [49] pkgconfig_2.0.3         ellipsis_0.3.2          rmarkdown_2.10         
## [52] R6_2.5.1                units_0.7-2             compiler_4.0.5