Wallace uses the following R packages that must be installed and loaded before starting.
library(spocc)
library(spThin)
library(dismo)
library(maxnet)
library(rgeos)
library(ENMeval)
library(dplyr)
library(mapview)
library(sf)
library(readxl)
library(readr)
library(knitr)
library(rmarkdown)#Load and see Data
User CSV path
# NOTE: provide the path to the folder that contains the CSV file
d.occs <- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
# create path to user occurrences csv file
userOccs.path <- file.path(d.occs, "Puntos_Promops_nasutus.csv")
# read in csv
userOccs.csv <- read.csv(userOccs.path, header = TRUE)
# remove rows with duplicate coordinates
occs.dups <- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs <- userOccs.csv[!occs.dups,]
# remove NAs
occs <- occs[complete.cases(occs$longitude, occs$latitude), ]
# give all records a unique ID
occs$occID <- row.names(occs)The following code recreates the polygon used to select occurrences to keep in the analysis.
selCoords <- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
occs.xy <- occs[c('longitude', 'latitude')]
sp::coordinates(occs.xy) <- ~ longitude + latitude
intersect <- sp::over(occs.xy, selPoly)
intersect.rowNums <- as.numeric(which(!(is.na(intersect))))
occs <- occs[intersect.rowNums, ]Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.
# get WorldClim bioclimatic variable rasters
envs <- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
# change names rasters variables
envRes <- 5
if (envRes == 0.5) {
i <- grep('_', names(envs))
editNames <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
names(envs)[i] <- editNames
}
i <- grep('bio[0-9]$', names(envs))
editNames <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
names(envs)[i] <- editNames
# subset by those variables selected
envs <- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
# extract environmental values at occ grid cells
locs.vals <- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
# remove occs without environmental values
occs <- occs[!is.na(locs.vals), ] Background selection technique chosen as Bounding Box.
xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))Buffer size of the study extent polygon defined as 18 degrees.
bgExt <- rgeos::gBuffer(bgExt, width = 18)Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.
# crop the environmental rasters by the background extent shape
envsBgCrop <- raster::crop(envs, bgExt)
# mask the background extent shape from the cropped raster
envsBgMsk <- raster::mask(envsBgCrop, bgExt)
# sample random background points
bg.xy <- dismo::randomPoints(envsBgMsk, 10000)
# convert matrix output to data frame
bg.xy <- as.data.frame(bg.xy)
colnames(bg.xy) <- c("longitude", "latitude")Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.
For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.
Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.
You selected to partition your occurrence data by the method.
occs.xy <- occs[c('longitude', 'latitude')]
group.data <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2)# pull out the occurrence and background partition group numbers from the list
occs.grp <- group.data[[1]]
bg.grp <- group.data[[2]]You selected the maxent model.
# define the vector of regularization multipliers to test
rms <- seq(1, 2, 1)
# iterate model building over all chosen parameter settings
e <- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
RMvalues = rms, fc = 'L', method = 'user',
occ.grp = occs.grp, bg.grp = bg.grp,
clamp = TRUE, algorithm = "maxnet")##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
evalTbl <- e@results
print(evalTbl)## rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 1 L rm.1_fc.L 0.676344 0.702 0.1194944 0.1276380 0.5833239
## 2 2 L rm.2_fc.L 0.656218 0.860 0.1149306 0.1246831 0.5304630
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg or.mtp.sd
## 1 0.065968757 0.2155 0.2778930 0.2756410 0.15411302 0.2339744 0.2130386
## 2 0.009016291 -0.4935 0.1817264 0.3589744 0.03626189 0.2339744 0.2130386
## AICc delta.AICc w.AIC ncoef
## 1 623.427 6.806941 0.03218717 6
## 2 616.620 0.000000 0.96781283 3
evalMods <- e@models
names(evalMods) <- e@tune.settings$tune.args
evalPreds <- e@predictions# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")# view ENMeval results
ENMeval::evalplot.stats(e, stats = "delta.AICc", "rm", "fc")# Select your model from the models list
mod <- evalMods[["rm.2_fc.L"]]# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]# generate raw prediction
pred <- evalPreds[["rm.2_fc.L"]]# plot the model prediction
plot(pred, main="P. nasutus")# generate logistic prediction
pred_logistic <- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE)# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred_logistic, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred_logistic <- pred_logistic > thr# plot the model prediction
plot(pred_logistic, main="P. nasutus")# generate cloglog prediction
pred <- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) # get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred <- pred > thr# plot the model prediction
plot(pred)You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.
projCoords <- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1)))Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.
envsFuture <- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)
predsProj <- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)
# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
predsProj <- raster::subset(predsProj, names(envs))# predict model
proj <- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE)# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
proj <- proj > thr# plot the model prediction
plot(proj)User CSV path
# NOTE: provide the path to the folder that contains the CSV file
d.occs <- 'D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/'
# create path to user occurrences csv file
userOccs.path <- file.path(d.occs, "Puntos_Promops_centralis.csv")
# read in csv
userOccs.csv <- read.csv(userOccs.path, header = TRUE)
# remove rows with duplicate coordinates
occs.dups <- duplicated(userOccs.csv[c('longitude', 'latitude')])
occs <- userOccs.csv[!occs.dups,]
# remove NAs
occs <- occs[complete.cases(occs$longitude, occs$latitude), ]
# give all records a unique ID
occs$occID <- row.names(occs)The following code recreates the polygon used to select occurrences to keep in the analysis.
selCoords <- data.frame(x = c(-44.2146, -32.0024, -32.4417, -59.1504, -74.2618, -88.7583, -81.1147, -75.6676, -44.2146), y = c(-31.3536, -15.369, 0.2637, 11.3508, 13.8381, -3.6889, -23.2413, -30.1451, -31.3536))
selPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(selCoords)), ID=1)))
occs.xy <- occs[c('longitude', 'latitude')]
sp::coordinates(occs.xy) <- ~ longitude + latitude
intersect <- sp::over(occs.xy, selPoly)
intersect.rowNums <- as.numeric(which(!(is.na(intersect))))
occs <- occs[intersect.rowNums, ]Using WorldClim (http://www.worldclim.org/) bioclimatic dataset at resolution of 5 arcmin.
# get WorldClim bioclimatic variable rasters
envs <- raster::getData(name = "worldclim", var = "bio", res = 5, lat = , lon = )
# change names rasters variables
envRes <- 5
if (envRes == 0.5) {
i <- grep('_', names(envs))
editNames <- sapply(strsplit(names(envs)[i], '_'), function(x) x[1])
names(envs)[i] <- editNames
}
i <- grep('bio[0-9]$', names(envs))
editNames <- paste('bio', sapply(strsplit(names(envs)[i], 'bio'), function(x) x[2]), sep='0')
names(envs)[i] <- editNames
# subset by those variables selected
envs <- envs[[c('bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10', 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19')]]
# extract environmental values at occ grid cells
locs.vals <- raster::extract(envs[[1]], occs[, c('longitude', 'latitude')])
# remove occs without environmental values
occs <- occs[!is.na(locs.vals), ] Background selection technique chosen as Bounding Box.
xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))Buffer size of the study extent polygon defined as 18 degrees.
bgExt <- rgeos::gBuffer(bgExt, width = 18)Mask environmental variables by Bounding Box, and take a random sample of background values from the study extent. As the sample is random, your results may be different than those in the session. If there seems to be too much variability in these background samples, try increasing the number from 10,000 to something higher (e.g. 50,000 or 100,000). The better your background sample, the less variability you’ll have between runs.
# crop the environmental rasters by the background extent shape
envsBgCrop <- raster::crop(envs, bgExt)
# mask the background extent shape from the cropped raster
envsBgMsk <- raster::mask(envsBgCrop, bgExt)
# sample random background points
bg.xy <- dismo::randomPoints(envsBgMsk, 10000)
# convert matrix output to data frame
bg.xy <- as.data.frame(bg.xy)
colnames(bg.xy) <- c("longitude", "latitude")Occurrence data is now partitioned for cross-validation, a method that iteratively builds a model on all but one group and evaluates that model on the left-out group.
For example, if the data is partitioned into 3 groups A, B, and C, a model is first built with groups A and B and is evaluated on C. This is repeated by building a model with B and C and evaluating on A, and so on until all combinations are done.
Cross-validation operates under the assumption that the groups are independent of each other, which may or may not be a safe assumption for your dataset. Spatial partitioning is one way to ensure more independence between groups.
You selected to partition your occurrence data by the method.
occs.xy <- occs[c('longitude', 'latitude')]
group.data <- ENMeval::get.randomkfold(occ = occs.xy, bg = bg.xy, kfolds = 2)# pull out the occurrence and background partition group numbers from the list
occs.grp <- group.data[[1]]
bg.grp <- group.data[[2]]You selected the maxent model.
# define the vector of regularization multipliers to test
rms <- seq(1, 2, 1)
# iterate model building over all chosen parameter settings
e <- ENMeval::ENMevaluate(occ = occs.xy, env = envsBgMsk, bg.coords = bg.xy,
RMvalues = rms, fc = 'L', method = 'user',
occ.grp = occs.grp, bg.grp = bg.grp,
clamp = TRUE, algorithm = "maxnet")##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# unpack the results data frame, the list of models, and the RasterStack of raw predictions
evalTbl <- e@results
print(evalTbl)## rm fc tune.args auc.train cbi.train auc.diff.avg auc.diff.sd auc.val.avg
## 1 1 L rm.1_fc.L 0.8700410 0.923 0.10298452 0.05856023 0.6468286
## 2 2 L rm.2_fc.L 0.8017692 0.632 0.07014762 0.08706841 0.6592429
## auc.val.sd cbi.val.avg cbi.val.sd or.10p.avg or.10p.sd or.mtp.avg
## 1 0.002033774 0.364 0.3450681 0.1785714 0.01683588 0.10317460
## 2 0.009980307 0.717 0.1385929 0.1785714 0.01683588 0.04761905
## or.mtp.sd AICc delta.AICc w.AIC ncoef
## 1 0.01122392 917.7179 0.00000 0.9995801496 12
## 2 0.06734350 933.2683 15.55038 0.0004198504 10
evalMods <- e@models
names(evalMods) <- e@tune.settings$tune.args
evalPreds <- e@predictions# view response curves for environmental variables with non-zero coefficients
plot(evalMods[["rm.2_fc.L"]], vars = c('bio05', 'bio07', 'bio19'), type = "cloglog")# view response curves for environmental variables with non-zero coefficients
# plot(evalMods[["rm.2_fc.L"]], vars = c('bio01', 'bio03', 'bio04', 'bio06', 'bio09', 'bio12', 'bio13', 'bio14', 'bio15', 'bio17', 'bio18', 'bio19'), type = "cloglog")# view ENMeval results
ENMeval::evalplot.stats(e, stats = "delta.AICc", "rm", "fc")# Select your model from the models list
mod <- evalMods[["rm.2_fc.L"]]# Select your model from the models list
#mod <- evalMods[["rm.1_fc.L"]]# generate raw prediction
pred <- evalPreds[["rm.2_fc.L"]]# plot the model prediction
plot(pred, main="P. centralis")# generate logistic prediction
pred_logistic <- predictMaxnet(mod, envsBgMsk, type = 'logistic', clamp = TRUE)# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred_logistic, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred_logistic <- pred_logistic > thr# plot the model prediction
plot(pred_logistic, main="P. centralis")# generate cloglog prediction
pred <- predictMaxnet(mod, envsBgMsk, type = 'cloglog', clamp = TRUE) # get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
pred <- pred > thr# plot the model prediction
plot(pred)You selected to project your model. First define a polygon with the coordinates you chose, then crop and mask your predictor rasters. Finally, predict suitability values for these new raster cells based on the model you selected.
projCoords <- data.frame(x = c(-77.8636, -81.7293, -82.0808, -77.5122, -78.9179, -47.6407, -31.4749, -27.7849, -32.3535, -58.7107, -76.2822, -77.8636), y = c(12.0393, 2.9869, -18.979, -39.2323, -47.2792, -46.195, -32.1012, -8.0592, 10.3149, 14.9448, 14.9448, 12.0393))
projPoly <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(projCoords)), ID=1)))Now download the future climate variables chosen with Wallace, crop and mask them by projPoly, and use the maxnet.predictRaster() function to predict the values for the new time based on the model selected.
envsFuture <- raster::getData("CMIP5", var = "bio", res = 5, rcp = 45, model = "AC", year = 50)
predsProj <- raster::crop(envsFuture, projPoly)
predsProj <- raster::mask(predsProj, projPoly)
# rename future climate variable names
names(predsProj) <- paste0('bio', sprintf("%02d", 1:19))
# select climate variables
predsProj <- raster::subset(predsProj, names(envs))# predict model
proj <- predictMaxnet(mod, predsProj, type = 'cloglog', clamp = TRUE)# get predicted values for occurrence grid cells
occPredVals <- raster::extract(pred, occs.xy)
# define minimum training presence threshold
thr <- thresh(occPredVals, "p10")
# threshold model prediction
proj <- proj > thr# plot the model prediction
plot(proj)