This algorithm is basically the dissever algorithm by Malone et al. (2012) and Roudier et al. (2017). The only difference is that it is meant for use with Google Earth Engine and therefore, utilizes parallel processing, more available resources and is meant for large geographic areas. It also takes the median of the predictions which will equal the mean but with soil data the median is a much more robust estimate (if you have to transform, back transforming revolves around the median). However, the downside is there are more options in the caret package (used now with dissever) that GEE does not offer as of yet in their free version.

Load libraries

#Load libraries needed
library(rafikisol)
library(rgee)
ee_Initialize()
## ── rgee 1.1.5 ─────────────────────────────────────── earthengine-api 0.1.356 ── 
##  ✔ user: not_defined
##  ✔ Initializing Google Earth Engine:
 ✔ Initializing Google Earth Engine:  DONE!
## 
 ✔ Earth Engine account: users/trevanflynn 
## ────────────────────────────────────────────────────────────────────────────────

We now set labels, scale for evaluation and where we want to predict, validation data.

#set label of response and parameters
label = "clay" #response
scale = 30 #resolution of landsat
country = "Rwanda" #which area to predict over

#Load country FeatureCollection (bounderies)
worldcountries = ee$FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level1");
filterCountry = ee$Filter$eq('ADM0_NAME', country);
region = worldcountries$filter(filterCountry);

#get ground truth
validation = ee$FeatureCollection("projects/rafkisol/assets/Rwanda_clay_30cm")$
  filterBounds(region) #make sure its in the area.

#Load soil grids and aggregate clay over 
clay = ee$Image("projects/soilgrids-isric/clay_mean")$
  select(c(0:2))$ #select depths to 30 cm
  reduce("mean")$#take mean
  divide(10)$ #transformation factor to get to percent.
  rename(label) #name correctly

Get landsat images and take the median for 2021 (or whatever time frame you think is best or time series).

#get a water mask
waterMask = ee$Image('UMD/hansen/global_forest_change_2016_v1_4')$
  select('datamask')$neq(1)$eq(0)

#Landsat data filtering of year
dataset = ee$ImageCollection('LANDSAT/LC08/C01/T1_SR')$ #surface reflectance image
  select(ee$List$sequence(0,10))$
  filterDate('2021-01-01', '2021-12-31')$ #dates
  filterBounds(region) #filter region
                 
#Get bands and indices (take median) - probably easier in an apply function or something in R.
blue =  dataset$select("B2")$median()
green = dataset$select("B3")$median()
red = dataset$select("B4")$median()
nir = dataset$select("B5")$median()
swir = dataset$select("B6")$median()
ndvi = nir$subtract(red)$divide(nir$add(red))
ndwi = nir$subtract(swir)$divide(nir$add(swir))
bi = (red$pow(2)$multiply(blue$pow(2))$multiply(green$pow(2)))$
    divide(ee$Number(3)$sqrt())
ci = green$subtract(red)$divide(green$add(red))
ri = (red$pow(2))$divide(blue$multiply(green)$pow(3))
si = red$subtract(blue)$divide(red$add(blue))

#label and combine into ImageCollection (with texture)
#labels
labs = list("Blue", "Green", "Red", "NIR", "SWIR", "NDVI", "NDWI", "BI", "CI", "RI","SI")

#combine images
lst = ee$ImageCollection(c(blue, green, red, nir, swir, ndvi, ndwi, bi, ci, ri, si))$
  toBands()$ #to image
  clip(region)$ #clip to country
  rename(labs)#add coordinates

#lets add climate covariates
coImg = lst$addBands(ee$Image$pixelLonLat())$ #add coords
  addBands(ee$Image("WORLDCLIM/V1/BIO"))$#add some climate
  updateMask(waterMask)$ #apply water mask
  reproject(crs = "EPSG:3857", scale = scale)$
  resample("bilinear")#get on proper resolution

Should always just check by seeing band names and plot.

#check band names
coImg$bandNames()$getInfo()
##  [1] "Blue"      "Green"     "Red"       "NIR"       "SWIR"      "NDVI"     
##  [7] "NDWI"      "BI"        "CI"        "RI"        "SI"        "longitude"
## [13] "latitude"  "bio01"     "bio02"     "bio03"     "bio04"     "bio05"    
## [19] "bio06"     "bio07"     "bio08"     "bio09"     "bio10"     "bio11"    
## [25] "bio12"     "bio13"     "bio14"     "bio15"     "bio16"     "bio17"    
## [31] "bio18"     "bio19"
#mins
mn = coImg$reduceRegion(reducer = 'min', geometry = region, 
                     maxPixels = 1e13, scale = 30)

#max
mx = coImg$reduceRegion(reducer = 'max', geometry = region, 
                     maxPixels = 1e13, scale = 30)

#check to see it plots
Map$centerObject(region, 8)#set where and zoom level

visNDVI = list(min = -1,max = 1, palette = c("darkgreen", "green", "yellow"))
Map$addLayer(coImg$select("NDVI"), visNDVI) + Map$addLegend(visNDVI, name = "NDVI", labFormat = leaflet::labelFormat(transform = function(x) sort(x, decreasing = TRUE)))

Now we have the rafikReg() function for downscaling with Monte Carlo. We use the median and not mean however, because of many reasons but will be the same normally. The algorithm, although fast may take some time to render. The inputs are:

covar = covariates to use.
response = image of what is wanted to be downscaled.
model = which ee model should be used for predictions.
stype = either stratified ("s") or random ("r").
scale = defaults to scale of covariates but can be used to evaluate at different scales.
sampNum = how many samples for each bootstrap.
boot = number of bootstraps to perform (defaults to 10).
... = parameters for the ee model of choice.
#apply function
preds = rafikReg(covar = coImg, response = clay, stype = "s",boot = 5, 
                 model = "gbt", sampNum = 10, numberOfTrees = 5)

Now we can map to see the difference between the two maps. The predictions will take a little bit to render.

#get min and max
#min
mn =preds[[1]]$reduceRegion(reducer = 'min', geometry = region, 
                             maxPixels = 1e13, scale = scale)
#max
mx = preds[[1]]$reduceRegion(reducer = 'max', geometry = region, 
                             maxPixels = 1e13, scale = scale)

#for a legend must use get info
visClay = list(min = mn$getNumber('median')$getInfo(), max = mx$getNumber("median")$getInfo(),
                  palette = c('blue', 'red', 'yellow'))
#make plot
Map$addLayer(preds[[1]], visClay)+ Map$addLegend(visClay, name = "Clay (%)", labFormat = leaflet::labelFormat(transform = function(x) sort(x, decreasing = TRUE)))

Lets see how it compares to the original map.

#plot
Map$addLayer(clay$clip(region), visClay)+ Map$addLegend(visClay, name = "Clay (%)", labFormat = leaflet::labelFormat(transform = function(x) sort(x, decreasing = TRUE)))

Both maps show abnormalities. To get a visual of the uncertainties we run the certCalc() function to calculate the prediction range, variance, standard deviation or standard error from the bootstraps. The function defaults to the prediction range giving the range, upper and lower confidence limits.

The inputs for certCalc() are:

models = results from regScale()
uncert = the uncertainty statistic (prediction range "range", variance "var" or standard deviation "stdev").
inter = the interval to multiply for the standard error (only for range) defaults to 0.95.
#apply function to produce an image
certImg = certCalc(models = preds, uncert = 'range')

This will give 90% confidence of the range of predictions fall in. We want as small as possible.

#get min and max
mn = certImg$reduceRegion(reducer = 'min', geometry = region, 
                             maxPixels = 1e13, scale = scale)
mx =certImg$reduceRegion(reducer = 'max', geometry = region, 
                             maxPixels = 1e13, scale = scale)


visRang = list(min = mn$getNumber('range')$getInfo(), max = mx$getNumber("range")$getInfo(),
                  palette = c("blue", "red", "yellow"))

#plot
Map$addLayer(certImg$select("range"), visRang) + Map$addLegend(visRang, name = "range (%)",labFormat = leaflet::labelFormat(transform = function(x) sort(x, decreasing = TRUE))) 

REFERENCES

Brendan P. Malone, Alex B. McBratney, Budiman Minasny, Ichsani Wheeler, (2012). A general method for downscaling earth resource information. Computers & Geosciences, Volume 41, Pages 119-125. https://doi.org/10.1016/j.cageo.2011.08.021.

P. Roudier, B.P. Malone, C.B. Hedley, B. Minasny, A.B. McBratney, (2017). Comparison of regression methods for spatial downscaling of soil organic carbon stocks maps. Computers and Electronics in Agriculture. Volume 142, Part A, Pages 91-100. https://doi.org/10.1016/j.compag.2017.08.021.

B.P. Malone, A.B. McBratney, B. Minasny, (2011). Empirical estimates of uncertainty for mapping continuous depth functions of soil attributes. Geoderma, Volume 160, Issues 3–4, Pages 614-626. https://doi.org/10.1016/j.geoderma.2010.11.013.

Safanelli, J.L.; Poppiel, R.R.; Ruiz, L.F.C.; Bonfatti, B.R.; Mello, F.A.O.; Rizzo, R.; Demattê, J.A.M. Terrain Analysis in Google Earth Engine: A Method Adapted for High-Performance Global-Scale Analysis. ISPRS Int. J. Geo-Inf. 2020, 9, 400. DOI: https://doi.org/10.3390/ijgi9060400