Wallace uses the following R packages that must be installed and loaded before starting.
library(spocc)
library(spThin)
library(dismo)
library(maxnet)
library(rgeos)
library(ENMeval)
library(dplyr)
library(mapview)
library(sf)
library(readxl)
library(readr)
library(knitr)
library(rmarkdown)
#Load and see Data
User CSV path
# NOTE: provide the path to the folder that contains the CSV file
<- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
d.occs # create path to user occurrences csv file
<- file.path(d.occs, "Puntos_Promops_nasutus.csv")
userOccs.path # read in csv
<- read.csv(userOccs.path, header = TRUE)
userOccs.csv # remove rows with duplicate coordinates
<- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs.dups <- userOccs.csv[!occs.dups,]
occs # remove NAs
<- occs[complete.cases(occs$longitude, occs$latitude), ]
occs # give all records a unique ID
$occID <- row.names(occs) occs
The following code recreates the polygon used to select occurrences to keep in the analysis.
<- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selCoords <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
selPoly <- occs[c('longitude', 'latitude')]
occs.xy ::coordinates(occs.xy) <- ~ longitude + latitude
sp<- sp::over(occs.xy, selPoly)
intersect <- as.numeric(which(!(is.na(intersect))))
intersect.rowNums <- occs[intersect.rowNums, ] occs
Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.
# get WorldClim bioclimatic variable rasters
<- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
envs # change names rasters variables
<- 5
envRes if (envRes == 0.5) {
<- grep('_', names(envs))
i <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
editNames names(envs)[i] <- editNames
}<- grep('bio[0-9]$', names(envs))
i <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
editNames names(envs)[i] <- editNames
# subset by those variables selected
<- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
envs # extract environmental values at occ grid cells
<- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
locs.vals # remove occs without environmental values
<- occs[!is.na(locs.vals), ] occs
Background selection technique chosen as Bounding Box.
<- min(occs$longitude)
xmin <- max(occs$longitude)
xmax <- min(occs$latitude)
ymin <- max(occs$latitude)
ymax <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bb <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1))) bgExt
Buffer size of the study extent polygon defined as 18 degrees.
<- rgeos::gBuffer(bgExt, width = 18) bgExt
Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.
# crop the environmental rasters by the background extent shape
<- raster::crop(envs, bgExt)
envsBgCrop # mask the background extent shape from the cropped raster
<- raster::mask(envsBgCrop, bgExt)
envsBgMsk # sample random background points
<- dismo::randomPoints(envsBgMsk, 10000)
bg.xy # convert matrix output to data frame
<- as.data.frame(bg.xy)
bg.xy colnames(bg.xy) <- c("longitude", "latitude")
Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.
For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.
Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.
You selected to partition your occurrence data by the method.
<- occs[c('longitude', 'latitude')]
occs.xy <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2) group.data
# pull out the occurrence and background partition group numbers from the list
<- group.data[[1]]
occs.grp <- group.data[[2]] bg.grp
You selected the maxent model.
# define the vector of regularization multipliers to test
<- seq(1, 2, 1)
rms # iterate model building over all chosen parameter settings
<- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
e RMvalues = rms, fc = 'L', method = 'user',
occ.grp = occs.grp, bg.grp = bg.grp,
clamp = TRUE, algorithm = "maxnet")
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
<- e@results
evalTbl print(evalTbl)
## rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 1 L rm.1_fc.L 0.676344 0.702 0.1194944 0.1276380 0.5833239
## 2 2 L rm.2_fc.L 0.656218 0.860 0.1149306 0.1246831 0.5304630
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg or.mtp.sd
## 1 0.065968757 0.2155 0.2778930 0.2756410 0.15411302 0.2339744 0.2130386
## 2 0.009016291 -0.4935 0.1817264 0.3589744 0.03626189 0.2339744 0.2130386
## AICc delta.AICc w.AIC ncoef
## 1 623.427 6.806941 0.03218717 6
## 2 616.620 0.000000 0.96781283 3
<- e@models
evalMods names(evalMods) <- e@tune.settings$tune.args
<- e@predictions evalPreds
# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")
# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")
# view ENMeval results
::evalplot.stats(e, stats = "delta.AICc", "rm", "fc") ENMeval
# Select your model from the models list
<- evalMods[["rm.2_fc.L"]] mod
# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]
# generate raw prediction
<- evalPreds[["rm.2_fc.L"]] pred
# plot the model prediction
plot(pred, main="P. nasutus")
# generate logistic prediction
<- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE) pred_logistic
# get predicted values for occurrence grid cells
<- raster::extract(pred_logistic, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- pred_logistic > thr pred_logistic
# plot the model prediction
plot(pred_logistic, main="P. nasutus")
# generate cloglog prediction
<- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) pred
# get predicted values for occurrence grid cells
<- raster::extract(pred, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- pred > thr pred
# plot the model prediction
plot(pred)
You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.
<- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projCoords <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1))) projPoly
Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.
<- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)
envsFuture
<- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)
predsProj
# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
<- raster::subset(predsProj, names(envs)) predsProj
# predict model
<- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE) proj
# get predicted values for occurrence grid cells
<- raster::extract(pred, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- proj > thr proj
# plot the model prediction
plot(proj)
User CSV path
# NOTE: provide the path to the folder that contains the CSV file
<- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
d.occs # create path to user occurrences csv file
<- file.path(d.occs, "Puntos_Promops_centralis.csv")
userOccs.path # read in csv
<- read.csv(userOccs.path, header = TRUE)
userOccs.csv # remove rows with duplicate coordinates
<- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs.dups <- userOccs.csv[!occs.dups,]
occs # remove NAs
<- occs[complete.cases(occs$longitude, occs$latitude), ]
occs # give all records a unique ID
$occID <- row.names(occs) occs
The following code recreates the polygon used to select occurrences to keep in the analysis.
<- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selCoords <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
selPoly <- occs[c('longitude', 'latitude')]
occs.xy ::coordinates(occs.xy) <- ~ longitude + latitude
sp<- sp::over(occs.xy, selPoly)
intersect <- as.numeric(which(!(is.na(intersect))))
intersect.rowNums <- occs[intersect.rowNums, ] occs
Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.
# get WorldClim bioclimatic variable rasters
<- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
envs # change names rasters variables
<- 5
envRes if (envRes == 0.5) {
<- grep('_', names(envs))
i <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
editNames names(envs)[i] <- editNames
}<- grep('bio[0-9]$', names(envs))
i <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
editNames names(envs)[i] <- editNames
# subset by those variables selected
<- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
envs # extract environmental values at occ grid cells
<- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
locs.vals # remove occs without environmental values
<- occs[!is.na(locs.vals), ] occs
Background selection technique chosen as Bounding Box.
<- min(occs$longitude)
xmin <- max(occs$longitude)
xmax <- min(occs$latitude)
ymin <- max(occs$latitude)
ymax <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bb <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1))) bgExt
Buffer size of the study extent polygon defined as 18 degrees.
<- rgeos::gBuffer(bgExt, width = 18) bgExt
Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.
# crop the environmental rasters by the background extent shape
<- raster::crop(envs, bgExt)
envsBgCrop # mask the background extent shape from the cropped raster
<- raster::mask(envsBgCrop, bgExt)
envsBgMsk # sample random background points
<- dismo::randomPoints(envsBgMsk, 10000)
bg.xy # convert matrix output to data frame
<- as.data.frame(bg.xy)
bg.xy colnames(bg.xy) <- c("longitude", "latitude")
Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.
For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.
Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.
You selected to partition your occurrence data by the method.
<- occs[c('longitude', 'latitude')]
occs.xy <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2) group.data
# pull out the occurrence and background partition group numbers from the list
<- group.data[[1]]
occs.grp <- group.data[[2]] bg.grp
You selected the maxent model.
# define the vector of regularization multipliers to test
<- seq(1, 2, 1)
rms # iterate model building over all chosen parameter settings
<- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
e RMvalues = rms, fc = 'L', method = 'user',
occ.grp = occs.grp, bg.grp = bg.grp,
clamp = TRUE, algorithm = "maxnet")
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
<- e@results
evalTbl print(evalTbl)
## rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 1 L rm.1_fc.L 0.8700410 0.923 0.10298452 0.05856023 0.6468286
## 2 2 L rm.2_fc.L 0.8017692 0.632 0.07014762 0.08706841 0.6592429
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg
## 1 0.002033774 0.364 0.3450681 0.1785714 0.01683588 0.10317460
## 2 0.009980307 0.717 0.1385929 0.1785714 0.01683588 0.04761905
## or.mtp.sd AICc delta.AICc w.AIC ncoef
## 1 0.01122392 917.7179 0.00000 0.9995801496 12
## 2 0.06734350 933.2683 15.55038 0.0004198504 10
<- e@models
evalMods names(evalMods) <- e@tune.settings$tune.args
<- e@predictions evalPreds
# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")
# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")
# view ENMeval results
::evalplot.stats(e, stats = "delta.AICc", "rm", "fc") ENMeval
# Select your model from the models list
<- evalMods[["rm.2_fc.L"]] mod
# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]
# generate raw prediction
<- evalPreds[["rm.2_fc.L"]] pred
# plot the model prediction
plot(pred, main="P. centralis")
# generate logistic prediction
<- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE) pred_logistic
# get predicted values for occurrence grid cells
<- raster::extract(pred_logistic, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- pred_logistic > thr pred_logistic
# plot the model prediction
plot(pred_logistic, main="P. centralis")
# generate cloglog prediction
<- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) pred
# get predicted values for occurrence grid cells
<- raster::extract(pred, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- pred > thr pred
# plot the model prediction
plot(pred)
You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.
<- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projCoords <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1))) projPoly
Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.
<- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)
envsFuture
<- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)
predsProj
# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
<- raster::subset(predsProj, names(envs)) predsProj
# predict model
<- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE) proj
# get predicted values for occurrence grid cells
<- raster::extract(pred, occs.xy)
occPredVals # define minimum training presence threshold
<- thresh(occPredVals, "p10")
thr # threshold model prediction
<- proj > thr proj
# plot the model prediction
plot(proj)