Package installation

Wallace uses the following R packages that must be installed and loaded before starting.

library(spocc)
library(spThin)
library(dismo)
library(maxnet)
library(rgeos)
library(ENMeval)
library(dplyr)

library(mapview)
library(sf)
library(readxl)
library(readr)

library(knitr)
library(rmarkdown)

#Load and see Data

Niche Model for nasutus using Wallace

Record of analysis for *Promops*.

User CSV path

# NOTE: provide the path to the folder that contains the CSV file
d.occs <- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
# create path to user occurrences csv file
userOccs.path <- file.path(d.occs, "Puntos_Promops_nasutus.csv")
# read in csv
userOccs.csv <- read.csv(userOccs.path, header = TRUE)
# remove rows with duplicate coordinates
occs.dups <- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs <- userOccs.csv[!occs.dups,]
# remove NAs
occs <- occs[complete.cases(occs$longitude, occs$latitude), ]
# give all records a unique ID
occs$occID <- row.names(occs)

Process Occurrence Data

The following code recreates the polygon used to select occurrences to keep in the analysis.

selCoords <- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
occs.xy <- occs[c('longitude', 'latitude')]
sp::coordinates(occs.xy) <- ~ longitude + latitude
intersect <- sp::over(occs.xy, selPoly)
intersect.rowNums <- as.numeric(which(!(is.na(intersect))))
occs <- occs[intersect.rowNums, ]

Obtain Environmental Data

Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.

# get WorldClim bioclimatic variable rasters
envs <- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
# change names rasters variables
envRes <- 5
if (envRes == 0.5) {
  i <- grep('_', names(envs))
  editNames <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
  names(envs)[i] <- editNames
}
i <- grep('bio[0-9]$', names(envs))
editNames <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
names(envs)[i] <- editNames
# subset by those variables selected
envs <- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
# extract environmental values at occ grid cells
locs.vals <- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
# remove occs without environmental values
occs <- occs[!is.na(locs.vals), ]  

Process Environmental Data

Background selection technique chosen as Bounding Box.

xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))

Buffer size of the study extent polygon defined as 18 degrees.

bgExt <- rgeos::gBuffer(bgExt, width = 18)

Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.

# crop the environmental rasters by the background extent shape
envsBgCrop <- raster::crop(envs, bgExt)
# mask the background extent shape from the cropped raster
envsBgMsk <- raster::mask(envsBgCrop, bgExt)
# sample random background points
bg.xy <- dismo::randomPoints(envsBgMsk, 10000)
# convert matrix output to data frame
bg.xy <- as.data.frame(bg.xy)  
colnames(bg.xy) <- c("longitude", "latitude")

Partition Occurrence Data

Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.

For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.

Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.

You selected to partition your occurrence data by the method.

occs.xy <- occs[c('longitude', 'latitude')]
group.data <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2)
# pull out the occurrence and background partition group numbers from the list
occs.grp <- group.data[[1]]
bg.grp <- group.data[[2]]

Build and Evaluate Niche Model

You selected the maxent model.

# define the vector of regularization multipliers to test
rms <- seq(1, 2, 1)
# iterate model building over all chosen parameter settings
e <- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
                          RMvalues = rms, fc = 'L', method = 'user', 
                          occ.grp = occs.grp, bg.grp = bg.grp, 
                          clamp = TRUE, algorithm = "maxnet")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
evalTbl <- e@results
print(evalTbl)
##   rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1  1  L rm.1_fc.L  0.676344     0.702    0.1194944   0.1276380   0.5833239
## 2  2  L rm.2_fc.L  0.656218     0.860    0.1149306   0.1246831   0.5304630
##    auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg  or.10p.sd or.mtp.avg or.mtp.sd
## 1 0.065968757      0.2155  0.2778930  0.2756410 0.15411302  0.2339744 0.2130386
## 2 0.009016291     -0.4935  0.1817264  0.3589744 0.03626189  0.2339744 0.2130386
##      AICc delta.AICc      w.AIC ncoef
## 1 623.427   6.806941 0.03218717     6
## 2 616.620   0.000000 0.96781283     3
evalMods <- e@models
names(evalMods) <- e@tune.settings$tune.args
evalPreds <- e@predictions
# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")

# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")
# view ENMeval results
ENMeval::evalplot.stats(e, stats = "delta.AICc", "rm", "fc")

# Select your model from the models list
mod <- evalMods[["rm.2_fc.L"]]

Generate predictions

# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]
# generate raw prediction
pred <- evalPreds[["rm.2_fc.L"]]
# plot the model prediction
plot(pred,  main="P. nasutus")

# generate logistic prediction
pred_logistic <- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE)
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred_logistic, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred_logistic <- pred_logistic > thr
# plot the model prediction
plot(pred_logistic, main="P. nasutus")

# generate cloglog prediction
pred <- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) 
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred <- pred > thr
# plot the model prediction
plot(pred)

Project Niche Model

You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.

projCoords <- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1)))

Project Niche Model to New Time

Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.

envsFuture <- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)

predsProj <- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)

# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
predsProj <- raster::subset(predsProj, names(envs))
# predict model
proj <- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE)
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
proj <- proj > thr
# plot the model prediction
plot(proj)

Niche model for centralis

User CSV path

# NOTE: provide the path to the folder that contains the CSV file
d.occs <- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
# create path to user occurrences csv file
userOccs.path <- file.path(d.occs, "Puntos_Promops_centralis.csv")
# read in csv
userOccs.csv <- read.csv(userOccs.path, header = TRUE)
# remove rows with duplicate coordinates
occs.dups <- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs <- userOccs.csv[!occs.dups,]
# remove NAs
occs <- occs[complete.cases(occs$longitude, occs$latitude), ]
# give all records a unique ID
occs$occID <- row.names(occs)

Process Occurrence Data

The following code recreates the polygon used to select occurrences to keep in the analysis.

selCoords <- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
occs.xy <- occs[c('longitude', 'latitude')]
sp::coordinates(occs.xy) <- ~ longitude + latitude
intersect <- sp::over(occs.xy, selPoly)
intersect.rowNums <- as.numeric(which(!(is.na(intersect))))
occs <- occs[intersect.rowNums, ]

Obtain Environmental Data

Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.

# get WorldClim bioclimatic variable rasters
envs <- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
# change names rasters variables
envRes <- 5
if (envRes == 0.5) {
  i <- grep('_', names(envs))
  editNames <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
  names(envs)[i] <- editNames
}
i <- grep('bio[0-9]$', names(envs))
editNames <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
names(envs)[i] <- editNames
# subset by those variables selected
envs <- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
# extract environmental values at occ grid cells
locs.vals <- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
# remove occs without environmental values
occs <- occs[!is.na(locs.vals), ]  

Process Environmental Data

Background selection technique chosen as Bounding Box.

xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))

Buffer size of the study extent polygon defined as 18 degrees.

bgExt <- rgeos::gBuffer(bgExt, width = 18)

Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.

# crop the environmental rasters by the background extent shape
envsBgCrop <- raster::crop(envs, bgExt)
# mask the background extent shape from the cropped raster
envsBgMsk <- raster::mask(envsBgCrop, bgExt)
# sample random background points
bg.xy <- dismo::randomPoints(envsBgMsk, 10000)
# convert matrix output to data frame
bg.xy <- as.data.frame(bg.xy)  
colnames(bg.xy) <- c("longitude", "latitude")

Partition Occurrence Data

Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.

For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.

Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.

You selected to partition your occurrence data by the method.

occs.xy <- occs[c('longitude', 'latitude')]
group.data <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2)
# pull out the occurrence and background partition group numbers from the list
occs.grp <- group.data[[1]]
bg.grp <- group.data[[2]]

Build and Evaluate Niche Model

You selected the maxent model.

# define the vector of regularization multipliers to test
rms <- seq(1, 2, 1)
# iterate model building over all chosen parameter settings
e <- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
                          RMvalues = rms, fc = 'L', method = 'user', 
                          occ.grp = occs.grp, bg.grp = bg.grp, 
                          clamp = TRUE, algorithm = "maxnet")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
evalTbl <- e@results
print(evalTbl)
##   rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1  1  L rm.1_fc.L 0.8700410     0.923   0.10298452  0.05856023   0.6468286
## 2  2  L rm.2_fc.L 0.8017692     0.632   0.07014762  0.08706841   0.6592429
##    auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg  or.10p.sd or.mtp.avg
## 1 0.002033774       0.364  0.3450681  0.1785714 0.01683588 0.10317460
## 2 0.009980307       0.717  0.1385929  0.1785714 0.01683588 0.04761905
##    or.mtp.sd     AICc delta.AICc        w.AIC ncoef
## 1 0.01122392 917.7179    0.00000 0.9995801496    12
## 2 0.06734350 933.2683   15.55038 0.0004198504    10
evalMods <- e@models
names(evalMods) <- e@tune.settings$tune.args
evalPreds <- e@predictions
# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")

# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")
# view ENMeval results
ENMeval::evalplot.stats(e, stats = "delta.AICc", "rm", "fc")

# Select your model from the models list
mod <- evalMods[["rm.2_fc.L"]]

Generate predictions

# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]
# generate raw prediction
pred <- evalPreds[["rm.2_fc.L"]]
# plot the model prediction
plot(pred,  main="P. centralis")

# generate logistic prediction
pred_logistic <- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE)
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred_logistic, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred_logistic <- pred_logistic > thr
# plot the model prediction
plot(pred_logistic, main="P. centralis")

# generate cloglog prediction
pred <- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) 
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred <- pred > thr
# plot the model prediction
plot(pred)

Project Niche Model

You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.

projCoords <- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1)))

Project Niche Model to New Time

Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.

envsFuture <- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)

predsProj <- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)

# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
predsProj <- raster::subset(predsProj, names(envs))
# predict model
proj <- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE)
# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
proj <- proj > thr
# plot the model prediction
plot(proj)