# install.packages('sf')
# install.packages('RStoolbox')
# install.packages('maptools')
# install.packages("devtools")
# install_github("narayanibarve/ENMGadgets")
# install.packages("ncdf4")
library(devtools)
library(ENMGadgets)
require(ENMGadgets)
library(rgeos)
library(rgdal)
library(sf) # This package allows you to import shapefiles
library(RStoolbox) #This package allows you to perform PCA on rasters or raster stacks
library(ggplot2)
library(reshape2)
library(maptools)
library(readr)
library(usdm)
library(spThin)
library(dplyr)
library(dismo)
library(maxnet)
library(mapview)
library(MASS) # for 2D kernel density function
library(magrittr) # for piping functionality, i.e., %>%
library(maptools) # reading shapefiles
library(ncdf4)
library(raster)
require(raster)
library(brainGraph)
library(corrplot)
library(rmaxent)
library(ENMeval)
library(rnaturalearth)
library(rnaturalearthdata)
library(rnaturalearthhires)
Here we are using 2.5 arc minute resolution (not exactly sure what that translates to in metric area)
bio1 <- raster("WC_2_2.5/wc2.1_2.5m_bio_1.tif")
bio2 <- raster("WC_2_2.5/wc2.1_2.5m_bio_2.tif")
bio3 <- raster("WC_2_2.5/wc2.1_2.5m_bio_3.tif")
bio4 <- raster("WC_2_2.5/wc2.1_2.5m_bio_4.tif")
bio5 <- raster("WC_2_2.5/wc2.1_2.5m_bio_5.tif")
bio6 <- raster("WC_2_2.5/wc2.1_2.5m_bio_6.tif")
bio7 <- raster("WC_2_2.5/wc2.1_2.5m_bio_7.tif")
bio8 <- raster("WC_2_2.5/wc2.1_2.5m_bio_8.tif")
bio9 <- raster("WC_2_2.5/wc2.1_2.5m_bio_9.tif")
bio10 <- raster("WC_2_2.5/wc2.1_2.5m_bio_10.tif")
bio11 <- raster("WC_2_2.5/wc2.1_2.5m_bio_11.tif")
bio12 <- raster("WC_2_2.5/wc2.1_2.5m_bio_12.tif")
bio13 <- raster("WC_2_2.5/wc2.1_2.5m_bio_13.tif")
bio14 <- raster("WC_2_2.5/wc2.1_2.5m_bio_14.tif")
bio15 <- raster("WC_2_2.5/wc2.1_2.5m_bio_15.tif")
bio16 <- raster("WC_2_2.5/wc2.1_2.5m_bio_16.tif")
bio17 <- raster("WC_2_2.5/wc2.1_2.5m_bio_17.tif")
bio18 <- raster("WC_2_2.5/wc2.1_2.5m_bio_18.tif")
bio19 <- raster("WC_2_2.5/wc2.1_2.5m_bio_19.tif")
bioclim2.5 <- stack(bio1, bio2 ,bio3, bio4, bio5,
bio6, bio7, bio8, bio9, bio10,
bio11, bio12, bio13, bio14,
bio15, bio16, bio17, bio18,
bio19)
names(bioclim2.5) <- c('bio01', 'bio02', 'bio03', 'bio04',
'bio05', 'bio06', 'bio07', 'bio08',
'bio09', 'bio10', 'bio11', 'bio12',
'bio13', 'bio14', 'bio15', 'bio16',
'bio17', 'bio18', 'bio19')
crs(bioclim2.5) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
These presence data have already been cleaned
x_trop_pres_all <- read.csv('data/clean_x_trop_NEW.csv')# This is the full dataframe (we will only use this for creating the accessible area)
x_trop_pres_thin_2.5 <- read.csv('data/x_tropicalis_2.5km_thin1_new.csv')# This is the dataframe we will use (thinned by 2.5 km)
african_anurans <- read.csv('data/clean_all_anurans.csv') #This is a dataframe of points (within our area of interest) for all African anurans.
Because we are using the native range to generate background data, we need to use the native range to ‘clip’ the bioclim raster. This is easy to do by creating an account in the IUCN redlist, and then just downloading a shapefile of the geographic range. Then I saved the shapefile in the working directory, and this command
x_trop_native_range_shapefile <- shapefile('data/x_trop_sf.shp')
projection(bioclim2.5)
## [1] "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
projection(x_trop_native_range_shapefile)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
crs(x_trop_native_range_shapefile) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
Because all three are in different projections, I have to reproject them so that they match
north_am_map <- ne_countries(scale = "large", continent = 'north america')
africa_map <- ne_countries(scale = 'large', continent = 'africa')
plot(africa_map)
##Matching projections first
crs(africa_map) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
projection(north_am_map)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
us_map <- ne_states(country = 'United States of America')
FL_map <- us_map[us_map$name == 'Florida',]
crs(FL_map) <- crs(bioclim2.5)
US_contig <- us_map[us_map$name != 'Alaska',]
US_contig <- us_map[us_map$name != 'Hawaii' & us_map$name != 'Alaska',]
crs(US_contig) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
x_trop_pres_all <- data.frame(x_trop_pres_all)
coordinates(x_trop_pres_all) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_pres_all) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
x_trop_native_range__buffer_small = buffer(x_trop_pres_all, width = 250000, dissolve = T)
plot(x_trop_native_range__buffer_small) #But this extends into the ocean now
x_trop_native_range__buffer_no_water <- gIntersection(x_trop_native_range__buffer_small, africa_map, byid=FALSE) # This clips the buffer so that it does not extends into the ocean
plot(x_trop_native_range__buffer_no_water)
Here I am cropping the the bioclim raster stack based on the native range shapefile I imported above. Then I am plotting all the bioclim variables to demonstrate they are clipped in the extent of the imported shapefile
crs(bioclim2.5) <- '+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0'
NR_bioclim2.5 <- crop(bioclim2.5, extent(x_trop_native_range__buffer_no_water))
NR_bioclim2.5 <- mask(NR_bioclim2.5, x_trop_native_range__buffer_no_water)
plot(x_trop_native_range__buffer_no_water)
FL_bioclim2.5 <- crop(bioclim2.5, extent(FL_map))
FL_bioclim2.5 <- mask(FL_bioclim2.5, FL_map)
plot(FL_bioclim2.5)
NR_bioclim2.5_cor_df <- na.omit(as.matrix(NR_bioclim2.5))
names(NR_bioclim2.5) <- c('bio01', 'bio02', 'bio03', 'bio04',
'bio05', 'bio06', 'bio07', 'bio08',
'bio09', 'bio10', 'bio11', 'bio12',
'bio13', 'bio14', 'bio15', 'bio16',
'bio17', 'bio18', 'bio19')
res <- cor(NR_bioclim2.5_cor_df)
round(res, 3)
## bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10
## bio01 1.000 0.179 -0.393 0.468 0.685 0.177 0.276 0.838 0.649 0.900
## bio02 0.179 1.000 -0.621 0.664 0.802 -0.888 0.957 0.161 -0.417 0.448
## bio03 -0.393 -0.621 1.000 -0.849 -0.799 0.637 -0.810 -0.277 0.237 -0.710
## bio04 0.468 0.664 -0.849 1.000 0.836 -0.631 0.826 0.484 -0.277 0.793
## bio05 0.685 0.802 -0.799 0.836 1.000 -0.565 0.879 0.552 0.023 0.880
## bio06 0.177 -0.888 0.637 -0.631 -0.565 1.000 -0.890 0.099 0.741 -0.192
## bio07 0.276 0.957 -0.810 0.826 0.879 -0.890 1.000 0.247 -0.416 0.597
## bio08 0.838 0.161 -0.277 0.484 0.552 0.099 0.247 1.000 0.450 0.754
## bio09 0.649 -0.417 0.237 -0.277 0.023 0.741 -0.416 0.450 1.000 0.306
## bio10 0.900 0.448 -0.710 0.793 0.880 -0.192 0.597 0.754 0.306 1.000
## bio11 0.710 -0.244 0.117 -0.244 0.160 0.594 -0.255 0.436 0.864 0.392
## bio12 -0.404 -0.425 0.623 -0.694 -0.593 0.365 -0.538 -0.439 0.088 -0.607
## bio13 -0.214 -0.151 0.262 -0.393 -0.266 0.110 -0.211 -0.284 0.027 -0.320
## bio14 -0.320 -0.492 0.675 -0.510 -0.601 0.414 -0.571 -0.140 0.052 -0.501
## bio15 0.420 0.709 -0.785 0.865 0.786 -0.674 0.824 0.441 -0.267 0.692
## bio16 -0.243 -0.152 0.272 -0.429 -0.282 0.111 -0.220 -0.345 0.029 -0.352
## bio17 -0.344 -0.544 0.756 -0.590 -0.663 0.475 -0.641 -0.142 0.084 -0.558
## bio18 -0.526 -0.481 0.813 -0.748 -0.747 0.388 -0.637 -0.369 -0.029 -0.744
## bio19 -0.124 -0.440 0.481 -0.545 -0.415 0.494 -0.515 -0.308 0.343 -0.340
## bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19
## bio01 0.710 -0.404 -0.214 -0.320 0.420 -0.243 -0.344 -0.526 -0.124
## bio02 -0.244 -0.425 -0.151 -0.492 0.709 -0.152 -0.544 -0.481 -0.440
## bio03 0.117 0.623 0.262 0.675 -0.785 0.272 0.756 0.813 0.481
## bio04 -0.244 -0.694 -0.393 -0.510 0.865 -0.429 -0.590 -0.748 -0.545
## bio05 0.160 -0.593 -0.266 -0.601 0.786 -0.282 -0.663 -0.747 -0.415
## bio06 0.594 0.365 0.110 0.414 -0.674 0.111 0.475 0.388 0.494
## bio07 -0.255 -0.538 -0.211 -0.571 0.824 -0.220 -0.641 -0.637 -0.515
## bio08 0.436 -0.439 -0.284 -0.140 0.441 -0.345 -0.142 -0.369 -0.308
## bio09 0.864 0.088 0.027 0.052 -0.267 0.029 0.084 -0.029 0.343
## bio10 0.392 -0.607 -0.320 -0.501 0.692 -0.352 -0.558 -0.744 -0.340
## bio11 1.000 0.094 0.123 -0.049 -0.177 0.127 -0.026 -0.058 0.272
## bio12 0.094 1.000 0.840 0.579 -0.552 0.872 0.614 0.743 0.744
## bio13 0.123 0.840 1.000 0.279 -0.118 0.982 0.280 0.405 0.598
## bio14 -0.049 0.579 0.279 1.000 -0.565 0.244 0.975 0.731 0.291
## bio15 -0.177 -0.552 -0.118 -0.565 1.000 -0.161 -0.651 -0.721 -0.455
## bio16 0.127 0.872 0.982 0.244 -0.161 1.000 0.250 0.425 0.646
## bio17 -0.026 0.614 0.280 0.975 -0.651 0.250 1.000 0.798 0.332
## bio18 -0.058 0.743 0.405 0.731 -0.721 0.425 0.798 1.000 0.462
## bio19 0.272 0.744 0.598 0.291 -0.455 0.646 0.332 0.462 1.000
res[ res > -0.75 & res < 0.75 ] = 0
corrplot(res, type = "upper", order = "alphabet",
tl.col = "black", tl.srt = 45, method = "number")
#Now I am just rerunning the model with the chosen variables
NR_bioclim2.5_cor_df <- na.omit(as.matrix(NR_bioclim2.5[[c(4,6,8,9,16,17),]]))
res <- cor(NR_bioclim2.5_cor_df)
round(res, 3)
## bio04 bio06 bio08 bio09 bio16 bio17
## bio04 1.000 -0.631 0.484 -0.277 -0.429 -0.590
## bio06 -0.631 1.000 0.099 0.741 0.111 0.475
## bio08 0.484 0.099 1.000 0.450 -0.345 -0.142
## bio09 -0.277 0.741 0.450 1.000 0.029 0.084
## bio16 -0.429 0.111 -0.345 0.029 1.000 0.250
## bio17 -0.590 0.475 -0.142 0.084 0.250 1.000
res[ res > -0.75 & res < 0.75 ] = 0
corrplot(res, type = "upper", order = "alphabet",
tl.col = "black", tl.srt = 45, method = "number")
# Creating the reduced stacks
NR_bioclim2.5_reduced <- NR_bioclim2.5[[c(4,6,8,9,16,17),]]
FL_bioclim2.5_reduced <- FL_bioclim2.5[[c(4,6,8,9,16,17),]]
#I am also going to add a raster I made that represents forest cover. It is a categorical raster where values of 1 are forest, and values of 0 are all other land category types.
FL_forest_resam_mask <- raster('forest_cover/FL_forest_resam_mask.tif')
NR_forest_resam_mask <- raster('forest_cover/NR_forest_resam_mask.tif')
crs(FL_forest_resam_mask)<- '+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0'
crs(NR_forest_resam_mask)<- '+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0'
# Adding the forest raster to the bioclim raster stack
NR_bioclim2.5_reduced <- NR_bioclim2.5[[c(4,6,8,9,16,17),]]
NR_bioclim2.5_reduced <- stack(NR_bioclim2.5_reduced, NR_forest_resam_mask)
names(NR_bioclim2.5_reduced[[7]]) <- 'forest'
FL_bioclim2.5_reduced <- FL_bioclim2.5[[c(4,6,8,9,16,17),]]
FL_bioclim2.5_reduced <- stack(FL_bioclim2.5_reduced, FL_forest_resam_mask)
names(FL_bioclim2.5_reduced[[7]]) <- 'forest'
#First we have to call this function, which will be used later to sample from the raster (this is what Rob was talking about, but I could not get the package to load properly)
sampleRast <- function(x, n, adjArea = TRUE, replace = TRUE, prob = TRUE) {
val <- as.vector(x[[1]])
# adjust probabilities for cell area and/or cell values
if (adjArea) {
areas <- raster::area(x[[1]], na.rm=TRUE)
areas <- as.vector(areas)
probs <- if (prob) {
val * areas
} else {
areas
}
} else if (!adjArea & prob) {
if (any(probs < 0)) warning('Some probabilities are < 0.', .immediate=TRUE)
probs <- val
} else if (!adjArea & !prob) {
probs <- rep(1, length(val))
}
cellNum <- 1:raster::ncell(x)
cellNum <- cellNum[!is.na(val)]
probs <- probs[!is.na(val)]
sites <- sample(cellNum, size=n, replace=replace, prob=probs)
xy <- raster::xyFromCell(x, sites)
xy
}
#bias layer function
biaslayer <- function(occs_df,longitude, latitude, raster_mold){
ll_ras <- raster::rasterize(occs_df[,c(longitude,latitude)],
raster_mold, 1)
ll_rasIDs <- which(raster::getValues(ll_ras) %in% 1)
occ_T <- sp::coordinates(ll_ras)[ll_rasIDs,]
dens <- MASS::kde2d(occ_T[,1], occ_T[,2],
n = c(nrow(ll_ras),
ncol(ll_ras)))
biasLayer <- raster::raster(dens)
if(!all(dim(biasLayer)[1:2] == dim(raster_mold)[1:2]))
biasLayer <- raster::resample(biasLayer,raster_mold)
return(biasLayer)
}
african_anurans <- data.frame(african_anurans)
coordinates(african_anurans) <- ~ decimalLongitude + decimalLatitude
crs(african_anurans) <- crs(NR_bioclim2.5)
african_anurans_crop <- crop(african_anurans, NR_bioclim2.5)
african_anurans_crop <- data.frame(african_anurans_crop)
anuran_bias_layer <- biaslayer(occs_df =african_anurans_crop,
longitude = 'decimalLongitude', latitude = 'decimalLatitude',
raster_mold = NR_bioclim2.5_reduced$bio04)
plot(anuran_bias_layer)
anuran_bias_layer<- crop(anuran_bias_layer, NR_bioclim2.5_reduced[[1]])
anuran_bias_layer<- mask(anuran_bias_layer, NR_bioclim2.5_reduced[[1]])
background_T<-sampleRast(anuran_bias_layer,2000,adjArea=TRUE, replace=FALSE,prob=TRUE) #sampling from the surrogate species bias layer to create a set of bias points
plot(anuran_bias_layer)
points(background_T)
#####As you can see, there are a greater number of these points in areas with higher density of Anuran occurrence
NR_bioclim2.5_df2 <- data.frame(rasterToPoints(NR_bioclim2.5_reduced))
mess1 <- mess(FL_bioclim2.5_reduced[[1:6]], NR_bioclim2.5_df2[3:8])
plot(mess1)
mess1[mess1<0]=-1
mess1[mess1>=0]=1
mess2 <- mess(FL_bioclim2.5_reduced[[2:6]], NR_bioclim2.5_df2[4:8])
plot(mess2)
mess1[mess1<0]=-1
mess1[mess1>=0]=1
#Converting the raster grids to a points dataframe
NR_bioclim2.5_reduced_df <-rasterToPoints(NR_bioclim2.5_reduced, spatial=TRUE)
NR_bioclim2.5_reduced_df <- data.frame(NR_bioclim2.5_reduced_df,
lon=coordinates(NR_bioclim2.5_reduced_df)[,1],
lat=coordinates(NR_bioclim2.5_reduced_df)[,2])
coordinates(NR_bioclim2.5_reduced_df) <- ~lon +lat
NR_bioclim2.5_reduced_df <- data.frame(NR_bioclim2.5_reduced_df)
## Generating 10000 points for background data
bg <- randomPoints(NR_bioclim2.5_reduced[[1]], n=10000)
bg <- as.data.frame(bg)
bg.grp <- rep(0, 2000) # generating a blank vector with a length of 2000 which will serve as the actual background points
#Here, we are using random k-fold validation to split the data into training/ testing. We are using 10 folds, meaning there will be 10 different models averaged together to determin test/ training AUC
random <- get.randomkfold(x_trop_pres_thin_2.5, bg, k=10)
coordinates(x_trop_pres_thin_2.5) <- ~ decimalLongitude + decimalLatitude
#Plot to see the results of the K-folds
plot(NR_bioclim2.5[[1]], legend=FALSE)
points(x_trop_pres_thin_2.5, pch=21, bg=random$occ.grp)
# The actual model
model_point_thin_kfold <- ENMevaluate(occ = coordinates(x_trop_pres_thin_2.5),
env=NR_bioclim2.5_reduced,
bg.coords =bg ,occ.grp = random$occ.grp ,
bg.grp = bg.grp, RMvalues =1,
fc=c('L','LQ','LP','LQP'), categoricals= 'forest', method = 'user',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = TRUE,progbar = TRUE, updateProgress = TRUE)
## Loading required namespace: rJava
## *** Running ENMevaluate using Maxent 3.4.1 via dismo 1.1.4 ***
## Doing user-defined evaluation groups...
## There are 1 occurrence records with NA for at least
## one predictor variable. Removing these records from analysis,
## resulting in 140 records...
## Of 4 total cores using 4
## Running in parallel...
## ENMeval completed in 3 minutes 10.4 seconds.
#This will be a table with AICc, and corresponding AUCs for each model
model_point_thin_kfold@results
#Here, the model with the lowest AICc is model 3, which uses only linear and product responses
#Exploring the best model more in depth
aic.opt <- model_point_thin_kfold@models[[which(model_point_thin_kfold@results$delta.AICc==0)]]
aic.opt@results
## [,1]
## X.Training.samples 140.0000
## Regularized.training.gain 0.6444
## Unregularized.training.gain 0.7011
## Iterations 500.0000
## Training.AUC 0.8203
## X.Background.points 10000.0000
## bio04.contribution 49.2825
## bio06.contribution 6.8817
## bio08.contribution 7.6385
## bio09.contribution 23.1694
## bio16.contribution 3.7010
## bio17.contribution 8.4180
## forest.contribution 0.9088
## bio04.permutation.importance 69.3905
## bio06.permutation.importance 9.5559
## bio08.permutation.importance 2.5498
## bio09.permutation.importance 2.8316
## bio16.permutation.importance 5.7066
## bio17.permutation.importance 9.9656
## forest.permutation.importance 0.0000
## Entropy 8.5648
## Prevalence..average.probability.of.presence.over.background.sites. 0.3131
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.0501
## Fixed.cumulative.value.1.area 0.7824
## Fixed.cumulative.value.1.training.omission 0.0071
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.1489
## Fixed.cumulative.value.5.area 0.5696
## Fixed.cumulative.value.5.training.omission 0.0571
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.2543
## Fixed.cumulative.value.10.area 0.4486
## Fixed.cumulative.value.10.training.omission 0.0714
## Minimum.training.presence.cumulative.threshold 0.0509
## Minimum.training.presence.Cloglog.threshold 0.0110
## Minimum.training.presence.area 0.9659
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 17.5063
## X10.percentile.training.presence.Cloglog.threshold 0.3934
## X10.percentile.training.presence.area 0.3468
## X10.percentile.training.presence.training.omission 0.1000
## Equal.training.sensitivity.and.specificity.cumulative.threshold 28.4949
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.5468
## Equal.training.sensitivity.and.specificity.area 0.2571
## Equal.training.sensitivity.and.specificity.training.omission 0.2571
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 16.9019
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.3811
## Maximum.training.sensitivity.plus.specificity.area 0.3533
## Maximum.training.sensitivity.plus.specificity.training.omission 0.0929
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 1.2965
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.0602
## Balance.training.omission..predicted.area.and.threshold.value.area 0.7547
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0071
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 6.5382
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.1780
## Equate.entropy.of.thresholded.and.original.distributions.area 0.5243
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0571
#Here we are looking at the importance of each variable
var.importance(aic.opt)
#These are the coefficients for each variable in the model. The interesting part here is that there appears to be a negative relationship with forest. I suspect this is because forested areas are probably less well-sampled. We should mention this in our talk
parse_lambdas(lambdas = model_point_thin_kfold@models[[4]])
##
## Features with non-zero weights
##
## feature lambda min max type
## (forest==0.0) 0.0180 0.000 1.000e+00 categorical
## (forest==1.0) -0.1795 0.000 1.000e+00 categorical
## bio04 -9.2947 57.808 4.373e+02 linear
## bio06 -8.4929 9.368 2.293e+01 linear
## bio09 -2.9338 17.677 3.009e+01 linear
## bio16 10.7476 93.000 2.691e+03 linear
## bio17 0.3265 0.000 4.740e+02 linear
## bio06^2 6.9108 87.759 5.259e+02 quadratic
## bio08^2 1.6133 232.339 8.948e+02 quadratic
## bio16^2 -1.3068 8649.000 7.241e+06 quadratic
## bio17^2 -3.4569 0.000 2.247e+05 quadratic
## bio04*bio06 6.9850 792.782 5.574e+03 product
## bio04*bio16 -12.3507 38392.383 4.719e+05 product
## bio04*bio17 6.2465 0.000 5.452e+04 product
## bio08*bio16 0.6736 2711.384 7.087e+04 product
## bio16*bio17 -2.9645 0.000 6.494e+05 product
##
##
## Features with zero weights
##
## feature lambda min max type
## bio08 0 15.24 29.91 linear
#Lastly, the response curves for the best model
response(model_point_thin_kfold@models[[3]])
#Predicting relative probability over the accessible area
NR_point_thin_pred <- (predict(model_point_thin_kfold@models[[3]],NR_bioclim2.5_reduced))
plot(NR_point_thin_pred)
#Predicting over Florida
FL_point_thin_pred <- (predict(model_point_thin_kfold@models[[3]],FL_bioclim2.5_reduced))
plot(FL_point_thin_pred)
#Setting the threshold for presence absence to 5%
pvtest <- data.frame(raster::extract(NR_point_thin_pred, coordinates(x_trop_pres_thin_2.5)))
pvtest_na <- na.omit(pvtest)
colnames(pvtest_na) <- 'probability'
quantile(pvtest_na$probability, 0.05)
## 5%
## 0.146155
FL_point_thin_pred_thresholded <- FL_point_thin_pred
FL_point_thin_pred_thresholded[FL_point_thin_pred_thresholded <= quantile(pvtest_na$probability, 0.05) ] = 0
FL_point_thin_pred_thresholded[FL_point_thin_pred_thresholded >quantile(pvtest_na$probability, 0.05) ] = 1
#Plotting the thresholded model
plot(FL_point_thin_pred_thresholded)
#Setting the threshold for presence absence to 10%
pvtest <- data.frame(raster::extract(NR_point_thin_pred, coordinates(x_trop_pres_thin_2.5)))
pvtest_na <- na.omit(pvtest)
colnames(pvtest_na) <- 'probability'
quantile(pvtest_na$probability, 0.1)
## 10%
## 0.392179
FL_point_thin_pred_thresholded <- FL_point_thin_pred
FL_point_thin_pred_thresholded[FL_point_thin_pred_thresholded <= quantile(pvtest_na$probability, 0.1) ] = 0
FL_point_thin_pred_thresholded[FL_point_thin_pred_thresholded >quantile(pvtest_na$probability, 0.1) ] = 1
#Plotting the thresholded model
plot(FL_point_thin_pred_thresholded)
#Just creating an empty vector with the length of the bias file points we made earlier
bg.grp2 <- rep(0, nrow(background_T))
model_bias_layer_kfold <- ENMevaluate(occ = coordinates(x_trop_pres_thin_2.5),
env=NR_bioclim2.5_reduced,
bg.coords =background_T ,occ.grp = random$occ.grp ,
bg.grp = bg.grp2, RMvalues =1,
fc=c('L','LQ','LP','LQP'),categoricals= 'forest', method = 'user',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = TRUE,progbar = TRUE, updateProgress = TRUE)
## *** Running ENMevaluate using Maxent 3.4.1 via dismo 1.1.4 ***
## Doing user-defined evaluation groups...
## There are 1 occurrence records with NA for at least
## one predictor variable. Removing these records from analysis,
## resulting in 140 records...
## Of 4 total cores using 4
## Running in parallel...
## ENMeval completed in 1 minutes 39.4 seconds.
#This will be a table with AICc, and corresponding AUCs for each model
model_bias_layer_kfold@results
#Here, the model with the lowest AICc is model 2, which uses only linear and quadratic responses
#Exploring the best model more in depth
aic.opt <- model_bias_layer_kfold@models[[which(model_bias_layer_kfold@results$delta.AICc==0)]]
aic.opt@results
## [,1]
## X.Training.samples 140.0000
## Regularized.training.gain 0.3289
## Unregularized.training.gain 0.3922
## Iterations 500.0000
## Training.AUC 0.7393
## X.Background.points 2000.0000
## bio04.contribution 20.2926
## bio06.contribution 14.4640
## bio08.contribution 24.4935
## bio09.contribution 5.2310
## bio16.contribution 11.4872
## bio17.contribution 19.7694
## forest.contribution 4.2623
## bio04.permutation.importance 16.0417
## bio06.permutation.importance 6.6748
## bio08.permutation.importance 18.5616
## bio09.permutation.importance 22.3335
## bio16.permutation.importance 7.1537
## bio17.permutation.importance 26.4045
## forest.permutation.importance 2.8302
## Entropy 7.2709
## Prevalence..average.probability.of.presence.over.background.sites. 0.4368
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.1496
## Fixed.cumulative.value.1.area 0.9440
## Fixed.cumulative.value.1.training.omission 0.0000
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.2200
## Fixed.cumulative.value.5.area 0.8025
## Fixed.cumulative.value.5.training.omission 0.0286
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.2974
## Fixed.cumulative.value.10.area 0.6820
## Fixed.cumulative.value.10.training.omission 0.0714
## Minimum.training.presence.cumulative.threshold 1.0352
## Minimum.training.presence.Cloglog.threshold 0.1501
## Minimum.training.presence.area 0.9425
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 11.6077
## X10.percentile.training.presence.Cloglog.threshold 0.3156
## X10.percentile.training.presence.area 0.6505
## X10.percentile.training.presence.training.omission 0.1000
## Equal.training.sensitivity.and.specificity.cumulative.threshold 36.1898
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.5268
## Equal.training.sensitivity.and.specificity.area 0.3215
## Equal.training.sensitivity.and.specificity.training.omission 0.3214
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 45.3326
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.6017
## Maximum.training.sensitivity.plus.specificity.area 0.2420
## Maximum.training.sensitivity.plus.specificity.training.omission 0.3714
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 1.0352
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.1501
## Balance.training.omission..predicted.area.and.threshold.value.area 0.9425
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 8.3077
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.2707
## Equate.entropy.of.thresholded.and.original.distributions.area 0.7185
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0571
#Here we are looking at the importance of each variable
var.importance(aic.opt)
#These are the coefficients for each variable in the model. The interesting part here is that there appears to be a negative relationship with forest. I suspect this is because forested areas are probably less well-sampled. We should mention this in our talk
parse_lambdas(lambdas = model_bias_layer_kfold@models[[2]])
##
## Features with non-zero weights
##
## feature lambda min max type
## (forest==0.0) 0.18705 0.000 1.000e+00 categorical
## (forest==1.0) -0.09713 0.000 1.000e+00 categorical
## bio04 -4.88232 59.602 4.171e+02 linear
## bio06 -1.75674 9.536 2.284e+01 linear
## bio08 6.27506 14.767 2.945e+01 linear
## bio09 -1.06353 17.345 2.926e+01 linear
## bio16 0.19720 130.000 2.618e+03 linear
## bio17 2.69677 0.000 4.550e+02 linear
## bio09^2 -1.73788 300.837 8.560e+02 quadratic
## bio16^2 2.02128 16900.000 6.854e+06 quadratic
## bio17^2 -0.16032 0.000 2.070e+05 quadratic
#Lastly, the response curves for the best model
response(model_bias_layer_kfold@models[[2]])
#Predicting relative probability over the accessible area
NR_bias_pred <- (predict(model_bias_layer_kfold@models[[2]],NR_bioclim2.5_reduced))
plot(NR_bias_pred)
#Predicting over Florida
FL_NR_bias_pred<- (predict(model_bias_layer_kfold@models[[2]],FL_bioclim2.5_reduced))
plot(FL_NR_bias_pred)
#Setting the threshold for presence absence to 5%
pvtest <- data.frame(raster::extract(NR_bias_pred, coordinates(x_trop_pres_thin_2.5)))
pvtest_na <- na.omit(pvtest)
colnames(pvtest_na) <- 'probability'
quantile(pvtest_na$probability, 0.05)
## 5%
## 0.2800685
FL_NR_bias_pred_thresholded <- FL_NR_bias_pred
FL_NR_bias_pred_thresholded[FL_NR_bias_pred_thresholded <= quantile(pvtest_na$probability, 0.05) ] = 0
FL_NR_bias_pred_thresholded[FL_NR_bias_pred_thresholded >quantile(pvtest_na$probability, 0.05) ] = 1
#Plotting the thresholded model
plot(FL_NR_bias_pred_thresholded)
#Setting the threshold for presence absence to 10%
pvtest <- data.frame(raster::extract(NR_bias_pred, coordinates(x_trop_pres_thin_2.5)))
pvtest_na <- na.omit(pvtest)
colnames(pvtest_na) <- 'probability'
quantile(pvtest_na$probability, 0.1)
## 10%
## 0.308446
FL_NR_bias_pred_thresholded <- FL_NR_bias_pred
FL_NR_bias_pred_thresholded[FL_NR_bias_pred_thresholded <= quantile(pvtest_na$probability, 0.1) ] = 0
FL_NR_bias_pred_thresholded[FL_NR_bias_pred_thresholded >quantile(pvtest_na$probability, 0.1) ] = 1
#Plotting the thresholded model
plot(FL_NR_bias_pred_thresholded)