In this short project, I model the remaining potential suitable habitat for the black rhino Diceros bicornis in Tanzania and Kenya.
occ <- gbif('Diceros', 'bicornis', ntries = 5)
occ_clean <- occ |> dplyr::select(lon, lat, basisOfRecord, occurrenceStatus) |> filter(basisOfRecord %in% c('HUMAN_OBSERVATION', 'PRESERVED_SPECIMEN')) |>
filter(occurrenceStatus == 'PRESENT') |>
mutate(species = 1) |>
dplyr::select(lon, lat, species) |>
unique() |>
drop_na()
coordinates(occ_clean) <- ~lon+lat
Getting boundary data for Kenya and Tanzania and aggregating them to a single polygon.
KEN <- raster::getData('GADM', country = 'KEN', level = 0)
TZA <- raster::getData('GADM', country = 'TZA', level = 0)
g <- raster::bind(KEN, TZA)
both_countries <- raster::aggregate(g)
Getting climate data
clim <- raster::getData('worldclim', var = 'bio', res = 10)
clim_crop <- mask(crop(clim, both_countries), both_countries)
plot(clim_crop[[2]])
Cropping occurrence records only for the study area boundary.
occ_crop <- crop(occ_clean, both_countries)
plot(clim_crop[[2]])
points(occ_crop, col = 'red')
Leaving out highly correlated predictor variables
ext <- raster::extract(clim_crop, occ_crop)
v <- usdm::vifstep(ext)
clim_used <- usdm::exclude(clim_crop, v)
clim_used
## class : RasterBrick
## dimensions : 100, 76, 7600, 8 (nrow, ncol, ncell, nlayers)
## resolution : 0.1666667, 0.1666667 (x, y)
## extent : 29.33333, 42, -11.66667, 5 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : memory
## names : bio4, bio7, bio8, bio13, bio14, bio15, bio18, bio19
## min values : 201, 92, 61, 45, 0, 30, 26, 0
## max values : 2101, 217, 299, 667, 84, 133, 769, 591
Preparing the sdmData object.
d <- sdmData(species~., train = occ_crop, predictors = clim_used, bg = list(method = 'gRandom', n = 500))
d
## class : sdmdata
## ===========================================================
## number of species : 1
## species names : species
## number of features : 8
## feature names : bio4, bio7, bio8, ...
## type : Presence-Background
## has independet test data? : FALSE
## number of records : 757
## has Coordinates? : TRUE
## Building the sdm model
Building the model using random forest, support vector machine, and general linear model. I also use both bootstrapping and sub-sampling. Test percentage is 30% of the dataset leaving 70% for training the model.
model <- sdm(species~., d, methods = c('rf', 'svm', 'glm'), replications = c('boot', 'sub'), test.p = 30, n =3)
model
## class : sdmModels
## ========================================================
## number of species : 1
## number of modelling methods : 3
## names of modelling methods : rf, svm, glm
## replicate.methods (data partitioning) : bootstrap,subsampling
## number of replicates (each method) : 3
## toral number of replicates per model : 6 (per species)
## test percentage (in subsampling) : 30
## ------------------------------------------
## model run success percentage (per species) :
## ------------------------------------------
## method species
## ----------------------
## rf : 100 %
## svm : 100 %
## glm : 100 %
##
## ###################################################################
## model Mean performance (per species), using test dataset (generated using partitioning):
## -------------------------------------------------------------------------------
##
## ## species : species
## =========================
##
## methods : AUC | COR | TSS | Deviance
## -------------------------------------------------------------------------
## rf : 0.97 | 0.87 | 0.86 | 0.4
## svm : 0.95 | 0.84 | 0.84 | 0.49
## glm : 0.93 | 0.78 | 0.82 | 0.63
Here is the prediction and ensemble model output for policy considerations.
pred <- predict(model, clim_used)
ens <- ensemble(model, clim_used, setting = list(method = 'weighted', stat = 'tss', opt = 2))
plot(ens, main = 'Potential Suitable Habitats for Black Rhino in Kenya and Tanzania')