Installing/ loading the packages

Importing presence data

Here I a am importing 2 different sets of data: the X. tropicalis presence points, along with occurrence data for all other African anurans. The X. tropicalis data has been thinned such that no 2 points occupy the same raster grid. The African anurans points will serve as a surrogate species bias layer. Because I do not have true absence data, I will preferentially draw background points from areas where other species are well-sampled. Data were obtained from GBIF and primary lit.

af_anurans <- read.csv('data/af_anurans.csv')
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')

Variables being used

Here, I am using bioclimatic variables from WorldClim 2.0, along with Globcover landcover data. Since this species is associated with forest, I converted the landcover data to be categorical (i.e. forest vs not forest). All rasters used are at 2.5 minute resolution. I am constucting 5 different models, each using a subset of variables with correlations < 0.75. Variables were selected based on presumed biological importance.

Note on accessible area: The IUCN puts some uncertainty on the range. Therefore, I have elected to define the accessible area as being a 200 km radius around all occurrence points.

NR_stack_m1 <- stack(
  raster('M_variables/Set1/bio07.asc'),
  raster('M_variables/Set1/bio11.asc'),
  raster('M_variables/Set1/bio15.asc'),
  raster('M_variables/Set1/bio16.asc'),
  raster('M_variables/Set1/bio17.asc')
)
FL_stack_m1 <- stack(
  raster('G_variables/Set1/bio07.asc'),
  raster('G_variables/Set1/bio11.asc'),
  raster('G_variables/Set1/bio15.asc'),
  raster('G_variables/Set1/bio16.asc'),
  raster('G_variables/Set1/bio17.asc')
)

NR_stack_m2 <- stack(
  raster('M_variables/Set2/bio05.asc'),
  raster('M_variables/Set2/bio06.asc'),
  raster('M_variables/Set2/bio11.asc'),
  raster('M_variables/Set2/bio15.asc'),
  raster('M_variables/Set2/bio16.asc'),
  raster('M_variables/Set2/bio17.asc')
)
FL_stack_m2 <- stack(
  raster('G_variables/Set2/bio05.asc'),
  raster('G_variables/Set2/bio06.asc'),
  raster('G_variables/Set2/bio11.asc'),
  raster('G_variables/Set2/bio15.asc'),
  raster('G_variables/Set2/bio16.asc'),
  raster('G_variables/Set2/bio17.asc')
)

NR_stack_m3 <- stack(
  raster('M_variables/Set3/bio06.asc'),
  raster('M_variables/Set3/bio10.asc'),
  raster('M_variables/Set3/bio14.asc'),
  raster('M_variables/Set3/bio15.asc'),
  raster('M_variables/Set3/bio16.asc'),
  raster('M_variables/Set3/forest.tif')
)
FL_stack_m3 <- stack(
  raster('G_variables/Set3/bio06.asc'),
  raster('G_variables/Set3/bio10.asc'),
  raster('G_variables/Set3/bio14.asc'),
  raster('G_variables/Set3/bio15.asc'),
  raster('G_variables/Set3/bio16.asc'),
  raster('G_variables/Set3/forest.tif')
)

NR_stack_m4 <- stack(
  raster('M_variables/Set4/bio2.asc'),
  raster('M_variables/Set4/bio4.asc'),
  raster('M_variables/Set4/bio06.asc'),
  raster('M_variables/Set4/bio11.asc'),
  raster('M_variables/Set4/bio15.asc'),
  raster('M_variables/Set4/bio16.asc'),
  raster('M_variables/Set4/bio17.asc')
)
FL_stack_m4 <- stack(
  raster('G_variables/Set4/bio2.asc'),
  raster('G_variables/Set4/bio4.asc'),
  raster('G_variables/Set4/bio06.asc'),
  raster('G_variables/Set4/bio11.asc'),
  raster('G_variables/Set4/bio15.asc'),
  raster('G_variables/Set4/bio16.asc'),
  raster('G_variables/Set4/bio17.asc')
)

NR_stack_m5 <- stack(
  raster('M_variables/Set5/bio07.asc'),
  raster('M_variables/Set5/bio11.asc'),
  raster('M_variables/Set5/bio15.asc'),
  raster('M_variables/Set5/bio16.asc'),
  raster('M_variables/Set5/bio17.asc'),
  raster('M_variables/Set5/forest.tif')
)
FL_stack_m5 <- stack(
  raster('G_variables/Set5/bio07.asc'),
  raster('G_variables/Set5/bio11.asc'),
  raster('G_variables/Set5/bio15.asc'),
  raster('G_variables/Set5/bio16.asc'),
  raster('G_variables/Set5/bio17.asc'),
  raster('G_variables/Set5/forest.tif')
)

crs(NR_stack_m1) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m1) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m2) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m2) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m3) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m3) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m4) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m4) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m5) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m5) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"

Below is a key for all variables being used

BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))

BIO4 = Temperature Seasonality (standard deviation *100) ####BIO5 = Max Temperature of Warmest Month ####BIO6 = Min Temperature of Coldest Month ####BIO7 = Temperature Annual Range (BIO5-BIO6) ####BIO10 = Mean Temperature of Warmest Quarter ####BIO11 = Mean Temperature of Coldest Quarter ####BIO14 = Precipitation of Driest Month ####BIO15 = Precipitation Seasonality (Coefficient of Variation) ####BIO16 = Precipitation of Wettest Quarter ####BIO17 = Precipitation of Driest Quarter

Examining the plots

####Red points represent points for the surrogate species bias layer. Blue points represent X. tropicalis presence points

Creating the bias layer

To actually create the bias layer, I am using Gaussian kernel density. Essentially, this will allow me to preferentially select background points from areas that appear to be well-sampled for other species

# Creating bias layer
af_anurans <- data.frame(af_anurans)
anuran_bias_layer <- biaslayer(occs_df =af_anurans, 
                               longitude = 'decimalLongitude', latitude = 'decimalLatitude', 
                               raster_mold = NR_stack_m1[[1]])

anuran_bias_layer<- crop(anuran_bias_layer, NR_stack_m1[[1]])
anuran_bias_layer<- mask(anuran_bias_layer, NR_stack_m1[[1]])
coordinates(af_anurans) <- ~ decimalLongitude + decimalLatitude

plot(anuran_bias_layer)
points(af_anurans, pch=23, bg = 'orange', cex = 0.75, col='red')

background_T<-sampleRast(anuran_bias_layer,2000,adjArea=TRUE, replace=FALSE,prob=TRUE) #sampling from the surrogate species bias layer to create a set of background data points
bg.grp <- rep(0, 2000) # generating a blank vector with a length of 2000 which will serve as the actual background points

plot(anuran_bias_layer)
points(background_T, pch=23, bg = 'black', cex = 0.6, col='red')

Setting up the models

I am using the package ENMeval. This package–in turn–uses package Dismo, which runs Maxent through R

To split the data into training and testing, I am using a k-folds validation, where the data are randomly partitioned into 10 groups, and then each group is used as test data and training data. Then all statistics are averaged over these 10 runs.

x_trop_all_cleaned <- data.frame(x_trop_all_cleaned)
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
random1 <- get.randomkfold(x_trop_all_cleaned, background_T, k=10)

plot(NR_stack_m1[[1]], legend=FALSE)
points(x_trop_all_cleaned, pch=21, bg=random1$occ.grp)

Model 1

Attributes of this model:

This is actually 12 different models. I am using 3 different beta multipliers (0.5,1,2), and then each of these is split up by using each of the following functions: linear only, linear product, linear quadraitc, and linear product quadratic

I will do this for each set of a priori variable, and only retain the model with the lowest AICc for further analysis (i.e the final set will be 5 models)

# The actual model
NR_stack_m1

## class      : RasterStack 
## dimensions : 310, 724, 224440, 5  (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -17.54167, 12.625, 2.291667, 15.20833  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
## names      : bio07, bio11, bio15, bio16, bio17

model_1 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
                                      env=NR_stack_m1,
                                      bg.coords =background_T ,occ.grp = random1$occ.grp ,
                                      bg.grp = bg.grp, RMvalues =1, 
                                      fc=c('L','LQ','LP','LQP'), 
                                      method='user',
                                      algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
                                      parallel = FALSE,progbar = TRUE, updateProgress = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

Model 1 results

#This will be a table with AICc, and corresponding AUCs for each model
model_1@results

##   settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1      L_1        L  1    0.6795    0.6640030   0.12119209   0.05628451
## 2     LQ_1       LQ  1    0.7017    0.6728146   0.11443877   0.06629353
## 3     LP_1       LP  1    0.7217    0.6999984   0.05593123   0.04416523
## 4    LQP_1      LQP  1    0.7232    0.6968577   0.07463082   0.05292211
##   var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1   0.07797906    0.007142857   0.0005102041        0.1219780      0.013604100
## 2   0.07758412    0.014835165   0.0009798199        0.1153846      0.010512686
## 3   0.03826417    0.007142857   0.0005102041        0.1082418      0.009453032
## 4   0.05270947    0.014285714   0.0020408163        0.1010989      0.012815146
##       AICc delta.AICc        w.AIC parameters
## 1 3154.435  48.970574 1.621433e-11          4
## 2 3124.751  19.285718 4.527762e-05          9
## 3 3107.139   1.673881 3.021658e-01          9
## 4 3105.465   0.000000 6.977889e-01         12

#Now retaining the model with the lowest AICc
aic.opt1 <- model_1@models[[which(model_1@results$delta.AICc==0)]]
aic.opt1@results

##                                                                                         [,1]
## X.Training.samples                                                                  139.0000
## Regularized.training.gain                                                             0.3116
## Unregularized.training.gain                                                           0.3660
## Iterations                                                                          500.0000
## Training.AUC                                                                          0.7232
## X.Background.points                                                                2000.0000
## bio07.contribution                                                                   34.3752
## bio11.contribution                                                                    1.0160
## bio15.contribution                                                                   10.0825
## bio16.contribution                                                                   10.2449
## bio17.contribution                                                                   44.2815
## bio07.permutation.importance                                                         18.4485
## bio11.permutation.importance                                                          0.1033
## bio15.permutation.importance                                                         15.9146
## bio16.permutation.importance                                                          7.1395
## bio17.permutation.importance                                                         58.3941
## Entropy                                                                               7.2873
## Prevalence..average.probability.of.presence.over.background.sites.                    0.4444
## Fixed.cumulative.value.1.cumulative.threshold                                         1.0000
## Fixed.cumulative.value.1.Cloglog.threshold                                            0.2270
## Fixed.cumulative.value.1.area                                                         0.9700
## Fixed.cumulative.value.1.training.omission                                            0.0072
## Fixed.cumulative.value.5.cumulative.threshold                                         5.0000
## Fixed.cumulative.value.5.Cloglog.threshold                                            0.2556
## Fixed.cumulative.value.5.area                                                         0.8650
## Fixed.cumulative.value.5.training.omission                                            0.0647
## Fixed.cumulative.value.10.cumulative.threshold                                       10.0000
## Fixed.cumulative.value.10.Cloglog.threshold                                           0.2898
## Fixed.cumulative.value.10.area                                                        0.7490
## Fixed.cumulative.value.10.training.omission                                           0.0863
## Minimum.training.presence.cumulative.threshold                                        0.8095
## Minimum.training.presence.Cloglog.threshold                                           0.2219
## Minimum.training.presence.area                                                        0.9750
## Minimum.training.presence.training.omission                                           0.0000
## X10.percentile.training.presence.cumulative.threshold                                11.0883
## X10.percentile.training.presence.Cloglog.threshold                                    0.2990
## X10.percentile.training.presence.area                                                 0.7260
## X10.percentile.training.presence.training.omission                                    0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold                      37.8350
## Equal.training.sensitivity.and.specificity.Cloglog.threshold                          0.4917
## Equal.training.sensitivity.and.specificity.area                                       0.3310
## Equal.training.sensitivity.and.specificity.training.omission                          0.3309
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold                   38.1128
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold                       0.4944
## Maximum.training.sensitivity.plus.specificity.area                                    0.3280
## Maximum.training.sensitivity.plus.specificity.training.omission                       0.3309
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold    0.8095
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold       0.2219
## Balance.training.omission..predicted.area.and.threshold.value.area                    0.9750
## Balance.training.omission..predicted.area.and.threshold.value.training.omission       0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold        10.8884
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold            0.2969
## Equate.entropy.of.thresholded.and.original.distributions.area                         0.7305
## Equate.entropy.of.thresholded.and.original.distributions.training.omission            0.0935

#Here we are looking at the importance of each variable
var.importance(aic.opt1)

##   variable percent.contribution permutation.importance
## 1    bio07              34.3752                18.4485
## 2    bio11               1.0160                 0.1033
## 3    bio15              10.0825                15.9146
## 4    bio16              10.2449                 7.1395
## 5    bio17              44.2815                58.3941

#These are the coefficients for each variable in the model. 
parse_lambdas(aic.opt1)

## 
## Features with non-zero weights
## 
##      feature   lambda       min       max      type
##        bio07  5.66846     8.988     27.32    linear
##      bio11^2 -0.02592   265.973    778.73 quadratic
##      bio15^2  4.55393  1494.339  25711.19 quadratic
##      bio17^2 -4.24517     0.000 200704.00 quadratic
##  bio07*bio11 -0.23140   176.373    630.22   product
##  bio07*bio15 -5.28997   431.254   3374.55   product
##  bio07*bio16 -5.94034  4588.695  44963.61   product
##  bio07*bio17  6.62809     0.000   4997.89   product
##  bio11*bio17 -1.42323     0.000  10668.97   product
##  bio15*bio16  7.95963 23521.858 323752.78   product
##  bio15*bio17  7.76101     0.000  23168.02   product
##  bio16*bio17 -4.66997     0.000 570766.00   product
## 
## 
## Features with zero weights
## 
##  feature lambda    min     max   type
##    bio11      0  16.31   27.91 linear
##    bio15      0  38.66  160.35 linear
##    bio16      0 394.00 2405.00 linear
##    bio17      0   0.00  448.00 linear

#Lastly, the response curves for the best model
response(aic.opt1)

Model 1: predictions

x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"

#Predicting relative probability over the accessible area
NR_pred_1 <- (predict(aic.opt1,NR_stack_m1))
plot(NR_pred_1)

#Predicting over Florida
FL_pred_1 <- (predict(aic.opt1, FL_stack_m1))
plot(FL_pred_1)

#Setting the threshold for presence absence to 5%
m1_thresh <- data.frame(raster::extract(NR_pred_1, coordinates(x_trop_all_cleaned)))
m1_thresh <- na.omit(m1_thresh)
colnames(m1_thresh) <- 'probability'
quantile(m1_thresh$probability, 0.05)

##        5% 
## 0.2532359

FL_pred_1_thresholded <- FL_pred_1

FL_pred_1_thresholded[FL_pred_1_thresholded >= aic.opt1@results[44] ] = 3
FL_pred_1_thresholded[FL_pred_1_thresholded >= aic.opt1@results[36] & FL_pred_1_thresholded < aic.opt1@results[44] ] = 2
FL_pred_1_thresholded[FL_pred_1_thresholded >= quantile(m1_thresh$probability, 0.05) & FL_pred_1_thresholded < aic.opt1@results[36] ] = 1
FL_pred_1_thresholded[FL_pred_1_thresholded < quantile(m1_thresh$probability, 0.05) ] = 0

#Plotting the thresholded model
plot(FL_pred_1_thresholded)

Model 2

# The actual model
NR_stack_m2

## class      : RasterStack 
## dimensions : 310, 724, 224440, 6  (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -17.54167, 12.625, 2.291667, 15.20833  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
## names      : bio05, bio06, bio11, bio15, bio16, bio17

model_2 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
                                      env=NR_stack_m2,
                                      bg.coords =background_T ,occ.grp = random1$occ.grp ,
                                      bg.grp = bg.grp, RMvalues =1, 
                                      fc=c('L','LQ','LP','LQP'), 
                                      method='user',
                                      algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
                                      parallel = FALSE,progbar = TRUE, updateProgress = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

Model 2 results

#This will be a table with AICc, and corresponding AUCs for each model
model_2@results

##   settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1      L_1        L  1    0.6858    0.6693679   0.10437585   0.05336123
## 2     LQ_1       LQ  1    0.6991    0.6690569   0.10500705   0.06356853
## 3     LP_1       LP  1    0.7124    0.6877365   0.06151944   0.04856117
## 4    LQP_1      LQP  1    0.7187    0.6888835   0.07893354   0.05535377
##   var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1   0.06954694    0.007142857   0.0005102041        0.1005495      0.006997612
## 2   0.07442220    0.014285714   0.0009070295        0.1153846      0.009378899
## 3   0.04085400    0.014285714   0.0009070295        0.1082418      0.008319245
## 4   0.05891174    0.007142857   0.0005102041        0.1153846      0.012780260
##       AICc delta.AICc        w.AIC parameters
## 1 3149.165  40.193184 1.522885e-09          5
## 2 3130.470  21.498968 1.746081e-05         10
## 3 3111.921   2.949689 1.862041e-01         10
## 4 3108.972   0.000000 8.137784e-01         13

#Now retaining the model with the lowest AICc
aic.opt2 <- model_2@models[[which(model_2@results$delta.AICc==0)]]
aic.opt2@results

##                                                                                         [,1]
## X.Training.samples                                                                  139.0000
## Regularized.training.gain                                                             0.2840
## Unregularized.training.gain                                                           0.3453
## Iterations                                                                          480.0000
## Training.AUC                                                                          0.7187
## X.Background.points                                                                2000.0000
## bio05.contribution                                                                    7.2353
## bio06.contribution                                                                    5.3422
## bio11.contribution                                                                    2.4328
## bio15.contribution                                                                   35.7333
## bio16.contribution                                                                    8.1681
## bio17.contribution                                                                   41.0883
## bio05.permutation.importance                                                         10.7961
## bio06.permutation.importance                                                         10.1055
## bio11.permutation.importance                                                          0.0000
## bio15.permutation.importance                                                         15.5904
## bio16.permutation.importance                                                          6.1583
## bio17.permutation.importance                                                         57.3498
## Entropy                                                                               7.3139
## Prevalence..average.probability.of.presence.over.background.sites.                    0.4581
## Fixed.cumulative.value.1.cumulative.threshold                                         1.0000
## Fixed.cumulative.value.1.Cloglog.threshold                                            0.2046
## Fixed.cumulative.value.1.area                                                         0.9655
## Fixed.cumulative.value.1.training.omission                                            0.0144
## Fixed.cumulative.value.5.cumulative.threshold                                         5.0000
## Fixed.cumulative.value.5.Cloglog.threshold                                            0.2543
## Fixed.cumulative.value.5.area                                                         0.8500
## Fixed.cumulative.value.5.training.omission                                            0.0432
## Fixed.cumulative.value.10.cumulative.threshold                                       10.0000
## Fixed.cumulative.value.10.Cloglog.threshold                                           0.3143
## Fixed.cumulative.value.10.area                                                        0.7385
## Fixed.cumulative.value.10.training.omission                                           0.0791
## Minimum.training.presence.cumulative.threshold                                        0.3663
## Minimum.training.presence.Cloglog.threshold                                           0.1909
## Minimum.training.presence.area                                                        0.9870
## Minimum.training.presence.training.omission                                           0.0000
## X10.percentile.training.presence.cumulative.threshold                                11.8489
## X10.percentile.training.presence.Cloglog.threshold                                    0.3272
## X10.percentile.training.presence.area                                                 0.7025
## X10.percentile.training.presence.training.omission                                    0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold                      37.7324
## Equal.training.sensitivity.and.specificity.Cloglog.threshold                          0.5119
## Equal.training.sensitivity.and.specificity.area                                       0.3440
## Equal.training.sensitivity.and.specificity.training.omission                          0.3453
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold                   35.1482
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold                       0.4959
## Maximum.training.sensitivity.plus.specificity.area                                    0.3715
## Maximum.training.sensitivity.plus.specificity.training.omission                       0.2806
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold    0.3663
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold       0.1909
## Balance.training.omission..predicted.area.and.threshold.value.area                    0.9870
## Balance.training.omission..predicted.area.and.threshold.value.training.omission       0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold         9.4154
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold            0.3095
## Equate.entropy.of.thresholded.and.original.distributions.area                         0.7505
## Equate.entropy.of.thresholded.and.original.distributions.training.omission            0.0791

#Here we are looking at the importance of each variable
var.importance(aic.opt2)

##   variable percent.contribution permutation.importance
## 1    bio05               7.2353                10.7961
## 2    bio06               5.3422                10.1055
## 3    bio11               2.4328                 0.0000
## 4    bio15              35.7333                15.5904
## 5    bio16               8.1681                 6.1583
## 6    bio17              41.0883                57.3498

#These are the coefficients for each variable in the model. 
parse_lambdas(aic.opt2)

## 
## Features with non-zero weights
## 
##      feature   lambda       min       max      type
##        bio05   4.0705 2.512e+01 3.969e+01    linear
##        bio06  -1.0789 9.772e+00 2.280e+01    linear
##      bio15^2   9.8691 1.494e+03 2.571e+04 quadratic
##      bio16^2   0.3041 1.552e+05 5.784e+06 quadratic
##      bio17^2  -1.1584 0.000e+00 2.007e+05 quadratic
##  bio05*bio15 -10.4067 1.203e+03 5.769e+03   product
##  bio05*bio16  -1.6088 1.356e+04 9.232e+04   product
##  bio05*bio17   4.1287 0.000e+00 1.394e+04   product
##  bio06*bio15   0.3095 7.402e+02 2.754e+03   product
##  bio06*bio17  -6.7361 0.000e+00 8.946e+03   product
##  bio15*bio16   3.9897 2.352e+04 3.238e+05   product
##  bio15*bio17  12.4560 0.000e+00 2.317e+04   product
##  bio16*bio17  -5.1683 0.000e+00 5.708e+05   product
## 
## 
## Features with zero weights
## 
##  feature lambda    min     max   type
##    bio11      0  16.31   27.91 linear
##    bio15      0  38.66  160.35 linear
##    bio16      0 394.00 2405.00 linear
##    bio17      0   0.00  448.00 linear

#Lastly, the response curves for the best model
response(aic.opt2)

Model 2: predictions

x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"

#Predicting relative probability over the accessible area
NR_pred_2 <- (predict(aic.opt2,NR_stack_m2))
plot(NR_pred_2)

#Predicting over Florida
FL_pred_2 <- (predict(aic.opt2, FL_stack_m2))
plot(FL_pred_2)

#Setting the threshold for presence absence to 5%
m2_thresh <- data.frame(raster::extract(NR_pred_2, coordinates(x_trop_all_cleaned)))
m2_thresh <- na.omit(m2_thresh)
colnames(m2_thresh) <- 'probability'
quantile(m2_thresh$probability, 0.05)

##        5% 
## 0.2668274

FL_pred_2_thresholded <- FL_pred_2
FL_pred_2_thresholded[FL_pred_2_thresholded >= aic.opt2@results[46] ] = 3
FL_pred_2_thresholded[FL_pred_2_thresholded >= aic.opt2@results[38] & FL_pred_2_thresholded < aic.opt2@results[46] ] = 2
FL_pred_2_thresholded[FL_pred_2_thresholded >= quantile(m2_thresh$probability, 0.05) & FL_pred_2_thresholded < aic.opt2@results[38] ] = 1
FL_pred_2_thresholded[FL_pred_2_thresholded < quantile(m2_thresh$probability, 0.05) ] = 0

#Plotting the thresholded model
plot(FL_pred_2_thresholded)

Model 3

# The actual model
NR_stack_m3

## class      : RasterStack 
## dimensions : 310, 724, 224440, 6  (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -17.54167, 12.625, 2.291667, 15.20833  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
## names      : bio06, bio10, bio14, bio15, bio16, forest 
## min values :     ?,     ?,     ?,     ?,     ?,      0 
## max values :     ?,     ?,     ?,     ?,     ?,      1

model_3 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
                                      env=NR_stack_m3,
                                      bg.coords =background_T ,occ.grp = random1$occ.grp ,
                                      bg.grp = bg.grp, RMvalues =1, 
                                      fc=c('L','LQ','LP','LQP'), 
                                      method='user', categoricals = 'forest',
                                      algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
                                      parallel = FALSE,progbar = TRUE, updateProgress = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

Model 3 results

#This will be a table with AICc, and corresponding AUCs for each model
model_3@results

##   settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1      L_1        L  1    0.6456    0.6296783   0.13383009   0.06127186
## 2     LQ_1       LQ  1    0.6617    0.6263107   0.13678796   0.07355277
## 3     LP_1       LP  1    0.6722    0.6496228   0.08908823   0.05524363
## 4    LQP_1      LQP  1    0.6858    0.6538445   0.10347159   0.06328535
##   var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1   0.07770363     0.01428571   0.0009070295        0.1148352       0.01718425
## 2   0.09009733     0.01428571   0.0020408163        0.1368132       0.01754653
## 3   0.05480550     0.01428571   0.0009070295        0.1076923       0.01044694
## 4   0.07082120     0.01428571   0.0020408163        0.1368132       0.01868031
##       AICc delta.AICc        w.AIC parameters
## 1 3135.317   23.88832 6.480276e-06          5
## 2 3124.806   13.37708 1.241882e-03          8
## 3 3124.660   13.23165 1.335550e-03         12
## 4 3111.429    0.00000 9.974161e-01         12

#Now retaining the model with the lowest AICc
aic.opt3 <- model_3@models[[which(model_3@results$delta.AICc==0)]]
aic.opt3@results

##                                                                                         [,1]
## X.Training.samples                                                                  139.0000
## Regularized.training.gain                                                             0.1670
## Unregularized.training.gain                                                           0.2329
## Iterations                                                                          500.0000
## Training.AUC                                                                          0.6858
## X.Background.points                                                                2000.0000
## bio06.contribution                                                                    6.7752
## bio10.contribution                                                                    9.3042
## bio14.contribution                                                                   37.1510
## bio15.contribution                                                                   23.5890
## bio16.contribution                                                                   20.9991
## forest.contribution                                                                   2.1816
## bio06.permutation.importance                                                         19.9052
## bio10.permutation.importance                                                          8.4572
## bio14.permutation.importance                                                         51.4989
## bio15.permutation.importance                                                         13.3732
## bio16.permutation.importance                                                          6.7656
## forest.permutation.importance                                                         0.0000
## Entropy                                                                               7.4331
## Prevalence..average.probability.of.presence.over.background.sites.                    0.5225
## Fixed.cumulative.value.1.cumulative.threshold                                         1.0000
## Fixed.cumulative.value.1.Cloglog.threshold                                            0.2552
## Fixed.cumulative.value.1.area                                                         0.9685
## Fixed.cumulative.value.1.training.omission                                            0.0000
## Fixed.cumulative.value.5.cumulative.threshold                                         5.0000
## Fixed.cumulative.value.5.Cloglog.threshold                                            0.3191
## Fixed.cumulative.value.5.area                                                         0.8695
## Fixed.cumulative.value.5.training.omission                                            0.0576
## Fixed.cumulative.value.10.cumulative.threshold                                       10.0000
## Fixed.cumulative.value.10.Cloglog.threshold                                           0.3700
## Fixed.cumulative.value.10.area                                                        0.7685
## Fixed.cumulative.value.10.training.omission                                           0.0863
## Minimum.training.presence.cumulative.threshold                                        1.4672
## Minimum.training.presence.Cloglog.threshold                                           0.2652
## Minimum.training.presence.area                                                        0.9550
## Minimum.training.presence.training.omission                                           0.0000
## X10.percentile.training.presence.cumulative.threshold                                12.7232
## X10.percentile.training.presence.Cloglog.threshold                                    0.3927
## X10.percentile.training.presence.area                                                 0.7210
## X10.percentile.training.presence.training.omission                                    0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold                      39.0615
## Equal.training.sensitivity.and.specificity.Cloglog.threshold                          0.5761
## Equal.training.sensitivity.and.specificity.area                                       0.3815
## Equal.training.sensitivity.and.specificity.training.omission                          0.3813
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold                   29.4992
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold                       0.5112
## Maximum.training.sensitivity.plus.specificity.area                                    0.4840
## Maximum.training.sensitivity.plus.specificity.training.omission                       0.2374
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold    1.4672
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold       0.2652
## Balance.training.omission..predicted.area.and.threshold.value.area                    0.9550
## Balance.training.omission..predicted.area.and.threshold.value.training.omission       0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold         6.1184
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold            0.3296
## Equate.entropy.of.thresholded.and.original.distributions.area                         0.8455
## Equate.entropy.of.thresholded.and.original.distributions.training.omission            0.0647

#Here we are looking at the importance of each variable
var.importance(aic.opt3)

##   variable percent.contribution permutation.importance
## 1    bio06               6.7752                19.9052
## 2    bio10               9.3042                 8.4572
## 3    bio14              37.1510                51.4989
## 4    bio15              23.5890                13.3732
## 5    bio16              20.9991                 6.7656
## 6   forest               2.1816                 0.0000

#These are the coefficients for each variable in the model. 
parse_lambdas(aic.opt3)

## 
## Features with non-zero weights
## 
##      feature   lambda       min       max      type
##        bio06  -8.8366 9.772e+00      22.8    linear
##        bio16   1.0076 3.940e+02    2405.0    linear
##      bio15^2   9.0405 1.494e+03   25711.2 quadratic
##      bio16^2   0.8726 1.552e+05 5784025.0 quadratic
##  bio06*bio10   7.3732 2.207e+02     664.0   product
##  bio06*bio14  -4.2611 0.000e+00    2396.2   product
##  bio06*bio15   3.6533 7.402e+02    2754.5   product
##  bio10*bio14   3.9265 0.000e+00    3148.8   product
##  bio10*bio15 -12.8895 1.014e+03    4554.9   product
##  bio14*bio15   8.0329 0.000e+00    5778.0   product
##  bio14*bio16  -5.6401 0.000e+00  142346.0   product
##  bio15*bio16   0.3674 2.352e+04  323752.8   product
## 
## 
## Features with zero weights
## 
##        feature lambda   min    max        type
##  (forest==0.0)      0  0.00   1.00 categorical
##          bio10      0 19.99  32.03      linear
##          bio14      0  0.00 120.00      linear
##          bio15      0 38.66 160.35      linear

#Lastly, the response curves for the best model
response(aic.opt3)

Model 3: predictions

x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"

#Predicting relative probability over the accessible area
NR_pred_3 <- (predict(aic.opt3,NR_stack_m3))
plot(NR_pred_3)

#Predicting over Florida
FL_pred_3 <- (predict(aic.opt3, FL_stack_m3))
plot(FL_pred_3)

#Setting the threshold for presence absence to 5%
m3_thresh <- data.frame(raster::extract(NR_pred_3, coordinates(x_trop_all_cleaned)))
m3_thresh <- na.omit(m3_thresh)
colnames(m3_thresh) <- 'probability'
quantile(m3_thresh$probability, 0.05)

##        5% 
## 0.3139689

FL_pred_3_thresholded <- FL_pred_3

FL_pred_3_thresholded[FL_pred_3_thresholded >= aic.opt3@results[46] ] = 3

FL_pred_3_thresholded[FL_pred_3_thresholded >= aic.opt3@results[38] & FL_pred_3_thresholded < aic.opt3@results[46] ] = 2

FL_pred_3_thresholded[FL_pred_3_thresholded >= quantile(m3_thresh$probability, 0.05) & FL_pred_3_thresholded < aic.opt3@results[38] ] = 1

FL_pred_3_thresholded[FL_pred_3_thresholded < quantile(m3_thresh$probability, 0.05) ] = 0

#Plotting the thresholded model
plot(FL_pred_3_thresholded)

Model 4

# The actual model
NR_stack_m4

## class      : RasterStack 
## dimensions : 310, 724, 224440, 7  (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667  (x, y)
## extent     : -17.54167, 12.625, 2.291667, 15.20833  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
## names      : bio2, bio4, bio06, bio11, bio15, bio16, bio17

model_4 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
                                      env=NR_stack_m4,
                                      bg.coords =background_T ,occ.grp = random1$occ.grp ,
                                      bg.grp = bg.grp, RMvalues =1, 
                                      fc=c('L','LQ','LP','LQP'), 
                                      method='user',
                                      algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
                                      parallel = FALSE,progbar = TRUE, updateProgress = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

Model 4 results

#This will be a table with AICc, and corresponding AUCs for each model
model_4@results

##   settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1      L_1        L  1    0.6972    0.6862505   0.06726086   0.04257800
## 2     LQ_1       LQ  1    0.7214    0.7001926   0.06874197   0.05004751
## 3     LP_1       LP  1    0.7503    0.7213130   0.04570191   0.04606394
## 4    LQP_1      LQP  1    0.7467    0.7114385   0.06234145   0.05383243
##   var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1   0.04100704    0.000000000   0.0000000000        0.1214286      0.013662132
## 2   0.04852184    0.014285714   0.0020408163        0.1148352      0.011515316
## 3   0.03515968    0.007142857   0.0005102041        0.1076923      0.008179366
## 4   0.05011068    0.007142857   0.0005102041        0.1076923      0.012714514
##       AICc delta.AICc        w.AIC parameters
## 1 3099.291  27.363396 9.942623e-07          6
## 2 3088.541  16.614312 2.146026e-04         10
## 3 3071.927   0.000000 8.697350e-01         17
## 4 3075.728   3.800548 1.300494e-01         19

#Now retaining the model with the lowest AICc
aic.opt4 <- model_4@models[[which(model_4@results$delta.AICc==0)]]
aic.opt4@results

##                                                                                         [,1]
## X.Training.samples                                                                  139.0000
## Regularized.training.gain                                                             0.4158
## Unregularized.training.gain                                                           0.4833
## Iterations                                                                          500.0000
## Training.AUC                                                                          0.7503
## X.Background.points                                                                2000.0000
## bio06.contribution                                                                    6.0628
## bio11.contribution                                                                   10.9602
## bio15.contribution                                                                    9.4240
## bio16.contribution                                                                    8.8258
## bio17.contribution                                                                   17.1500
## bio2.contribution                                                                    20.1481
## bio4.contribution                                                                    27.4291
## bio06.permutation.importance                                                         12.5048
## bio11.permutation.importance                                                          4.7848
## bio15.permutation.importance                                                         11.6010
## bio16.permutation.importance                                                          6.2969
## bio17.permutation.importance                                                         34.1247
## bio2.permutation.importance                                                          18.1242
## bio4.permutation.importance                                                          12.5637
## Entropy                                                                               7.1865
## Prevalence..average.probability.of.presence.over.background.sites.                    0.4000
## Fixed.cumulative.value.1.cumulative.threshold                                         1.0000
## Fixed.cumulative.value.1.Cloglog.threshold                                            0.1372
## Fixed.cumulative.value.1.area                                                         0.9305
## Fixed.cumulative.value.1.training.omission                                            0.0144
## Fixed.cumulative.value.5.cumulative.threshold                                         5.0000
## Fixed.cumulative.value.5.Cloglog.threshold                                            0.2390
## Fixed.cumulative.value.5.area                                                         0.8050
## Fixed.cumulative.value.5.training.omission                                            0.0504
## Fixed.cumulative.value.10.cumulative.threshold                                       10.0000
## Fixed.cumulative.value.10.Cloglog.threshold                                           0.2950
## Fixed.cumulative.value.10.area                                                        0.6995
## Fixed.cumulative.value.10.training.omission                                           0.0935
## Minimum.training.presence.cumulative.threshold                                        0.0718
## Minimum.training.presence.Cloglog.threshold                                           0.0499
## Minimum.training.presence.area                                                        0.9850
## Minimum.training.presence.training.omission                                           0.0000
## X10.percentile.training.presence.cumulative.threshold                                13.6050
## X10.percentile.training.presence.Cloglog.threshold                                    0.3195
## X10.percentile.training.presence.area                                                 0.6350
## X10.percentile.training.presence.training.omission                                    0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold                      37.6878
## Equal.training.sensitivity.and.specificity.Cloglog.threshold                          0.4504
## Equal.training.sensitivity.and.specificity.area                                       0.3095
## Equal.training.sensitivity.and.specificity.training.omission                          0.3094
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold                   34.8433
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold                       0.4379
## Maximum.training.sensitivity.plus.specificity.area                                    0.3420
## Maximum.training.sensitivity.plus.specificity.training.omission                       0.2302
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold    0.0718
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold       0.0499
## Balance.training.omission..predicted.area.and.threshold.value.area                    0.9850
## Balance.training.omission..predicted.area.and.threshold.value.training.omission       0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold        12.1530
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold            0.3100
## Equate.entropy.of.thresholded.and.original.distributions.area                         0.6605
## Equate.entropy.of.thresholded.and.original.distributions.training.omission            0.0935

#Here we are looking at the importance of each variable
var.importance(aic.opt4)

##   variable percent.contribution permutation.importance
## 1    bio06               6.0628                12.5048
## 2    bio11              10.9602                 4.7848
## 3    bio15               9.4240                11.6010
## 4    bio16               8.8258                 6.2969
## 5    bio17              17.1500                34.1247
## 6     bio2              20.1481                18.1242
## 7     bio4              27.4291                12.5637

#These are the coefficients for each variable in the model. 
parse_lambdas(aic.opt4)

## 
## Features with non-zero weights
## 
##      feature    lambda       min       max    type
##        bio06   0.25602     9.772     22.80  linear
##        bio11  -2.69808    16.309     27.91  linear
##         bio2   8.21237     6.128     16.79  linear
##  bio06*bio16   1.44891  6509.584  52527.38 product
##  bio06*bio17   0.18531     0.000   8945.66 product
##   bio06*bio2  -0.45070   107.015    241.55 product
##   bio06*bio4   6.76774   793.004   4067.63 product
##   bio11*bio4  -0.07104  1407.320   6088.96 product
##  bio15*bio16   6.36461 23521.858 323752.78 product
##  bio15*bio17   6.95965     0.000  23168.02 product
##   bio15*bio2   3.24707   296.948   2326.00 product
##  bio16*bio17  -4.24404     0.000 570766.00 product
##   bio16*bio2  -0.85607  3348.035  26071.86 product
##   bio16*bio4  -6.64701 42338.120 433288.82 product
##   bio17*bio2  -2.37030     0.000   4132.99 product
##   bio17*bio4   5.11139     0.000  51811.45 product
##    bio2*bio4 -11.36027   522.607   3456.82 product
## 
## 
## Features with zero weights
## 
##  feature lambda    min    max   type
##    bio15      0  38.66  160.3 linear
##    bio16      0 394.00 2405.0 linear
##    bio17      0   0.00  448.0 linear
##     bio4      0  57.24  252.4 linear

#Lastly, the response curves for the best model
response(aic.opt4)

Model 4: predictions

x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"

#Predicting relative probability over the accessible area
NR_pred_4 <- (predict(aic.opt4,NR_stack_m4))
plot(NR_pred_4)

#Predicting over Florida
FL_pred_4 <- (predict(aic.opt4, FL_stack_m4))
plot(FL_pred_4)

#Setting the threshold for presence absence to 5%
m4_thresh <- data.frame(raster::extract(NR_pred_4, coordinates(x_trop_all_cleaned)))
m4_thresh <- na.omit(m4_thresh)
colnames(m4_thresh) <- 'probability'
quantile(m4_thresh$probability, 0.05)

##        5% 
## 0.2584182

FL_pred_4_thresholded <- FL_pred_4

FL_pred_4_thresholded[FL_pred_4_thresholded >= aic.opt4@results[48] ] = 3
FL_pred_4_thresholded[FL_pred_4_thresholded >= aic.opt4@results[40] & FL_pred_4_thresholded < aic.opt4@results[48] ] = 2
FL_pred_4_thresholded[FL_pred_4_thresholded >= quantile(m4_thresh$probability, 0.05) & FL_pred_4_thresholded < aic.opt4@results[40] ] = 1
FL_pred_4_thresholded[FL_pred_4_thresholded < quantile(m4_thresh$probability, 0.05) ] = 0

#Plotting the thresholded model
plot(FL_pred_4_thresholded)

Proposed ENM analysis

Colin

April 20, 2020