af_anurans <- read.csv('data/af_anurans.csv')
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
Note on accessible area: The IUCN puts some uncertainty on the range. Therefore, I have elected to define the accessible area as being a 200 km radius around all occurrence points.
NR_stack_m1 <- stack(
raster('M_variables/Set1/bio07.asc'),
raster('M_variables/Set1/bio11.asc'),
raster('M_variables/Set1/bio15.asc'),
raster('M_variables/Set1/bio16.asc'),
raster('M_variables/Set1/bio17.asc')
)
FL_stack_m1 <- stack(
raster('G_variables/Set1/bio07.asc'),
raster('G_variables/Set1/bio11.asc'),
raster('G_variables/Set1/bio15.asc'),
raster('G_variables/Set1/bio16.asc'),
raster('G_variables/Set1/bio17.asc')
)
NR_stack_m2 <- stack(
raster('M_variables/Set2/bio05.asc'),
raster('M_variables/Set2/bio06.asc'),
raster('M_variables/Set2/bio11.asc'),
raster('M_variables/Set2/bio15.asc'),
raster('M_variables/Set2/bio16.asc'),
raster('M_variables/Set2/bio17.asc')
)
FL_stack_m2 <- stack(
raster('G_variables/Set2/bio05.asc'),
raster('G_variables/Set2/bio06.asc'),
raster('G_variables/Set2/bio11.asc'),
raster('G_variables/Set2/bio15.asc'),
raster('G_variables/Set2/bio16.asc'),
raster('G_variables/Set2/bio17.asc')
)
NR_stack_m3 <- stack(
raster('M_variables/Set3/bio06.asc'),
raster('M_variables/Set3/bio10.asc'),
raster('M_variables/Set3/bio14.asc'),
raster('M_variables/Set3/bio15.asc'),
raster('M_variables/Set3/bio16.asc'),
raster('M_variables/Set3/forest.tif')
)
FL_stack_m3 <- stack(
raster('G_variables/Set3/bio06.asc'),
raster('G_variables/Set3/bio10.asc'),
raster('G_variables/Set3/bio14.asc'),
raster('G_variables/Set3/bio15.asc'),
raster('G_variables/Set3/bio16.asc'),
raster('G_variables/Set3/forest.tif')
)
NR_stack_m4 <- stack(
raster('M_variables/Set4/bio2.asc'),
raster('M_variables/Set4/bio4.asc'),
raster('M_variables/Set4/bio06.asc'),
raster('M_variables/Set4/bio11.asc'),
raster('M_variables/Set4/bio15.asc'),
raster('M_variables/Set4/bio16.asc'),
raster('M_variables/Set4/bio17.asc')
)
FL_stack_m4 <- stack(
raster('G_variables/Set4/bio2.asc'),
raster('G_variables/Set4/bio4.asc'),
raster('G_variables/Set4/bio06.asc'),
raster('G_variables/Set4/bio11.asc'),
raster('G_variables/Set4/bio15.asc'),
raster('G_variables/Set4/bio16.asc'),
raster('G_variables/Set4/bio17.asc')
)
NR_stack_m5 <- stack(
raster('M_variables/Set5/bio07.asc'),
raster('M_variables/Set5/bio11.asc'),
raster('M_variables/Set5/bio15.asc'),
raster('M_variables/Set5/bio16.asc'),
raster('M_variables/Set5/bio17.asc'),
raster('M_variables/Set5/forest.tif')
)
FL_stack_m5 <- stack(
raster('G_variables/Set5/bio07.asc'),
raster('G_variables/Set5/bio11.asc'),
raster('G_variables/Set5/bio15.asc'),
raster('G_variables/Set5/bio16.asc'),
raster('G_variables/Set5/bio17.asc'),
raster('G_variables/Set5/forest.tif')
)
crs(NR_stack_m1) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m1) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m2) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m2) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m3) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m3) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m4) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m4) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(NR_stack_m5) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
crs(FL_stack_m5) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
####Red points represent points for the surrogate species bias layer. Blue points represent X. tropicalis presence points
# Creating bias layer
af_anurans <- data.frame(af_anurans)
anuran_bias_layer <- biaslayer(occs_df =af_anurans,
longitude = 'decimalLongitude', latitude = 'decimalLatitude',
raster_mold = NR_stack_m1[[1]])
anuran_bias_layer<- crop(anuran_bias_layer, NR_stack_m1[[1]])
anuran_bias_layer<- mask(anuran_bias_layer, NR_stack_m1[[1]])
coordinates(af_anurans) <- ~ decimalLongitude + decimalLatitude
plot(anuran_bias_layer)
points(af_anurans, pch=23, bg = 'orange', cex = 0.75, col='red')
background_T<-sampleRast(anuran_bias_layer,2000,adjArea=TRUE, replace=FALSE,prob=TRUE) #sampling from the surrogate species bias layer to create a set of background data points
bg.grp <- rep(0, 2000) # generating a blank vector with a length of 2000 which will serve as the actual background points
plot(anuran_bias_layer)
points(background_T, pch=23, bg = 'black', cex = 0.6, col='red')
x_trop_all_cleaned <- data.frame(x_trop_all_cleaned)
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
random1 <- get.randomkfold(x_trop_all_cleaned, background_T, k=10)
plot(NR_stack_m1[[1]], legend=FALSE)
points(x_trop_all_cleaned, pch=21, bg=random1$occ.grp)
# The actual model
NR_stack_m1
## class : RasterStack
## dimensions : 310, 724, 224440, 5 (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667 (x, y)
## extent : -17.54167, 12.625, 2.291667, 15.20833 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
## names : bio07, bio11, bio15, bio16, bio17
model_1 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
env=NR_stack_m1,
bg.coords =background_T ,occ.grp = random1$occ.grp ,
bg.grp = bg.grp, RMvalues =1,
fc=c('L','LQ','LP','LQP'),
method='user',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = FALSE,progbar = TRUE, updateProgress = TRUE)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
#This will be a table with AICc, and corresponding AUCs for each model
model_1@results
## settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1 L_1 L 1 0.6795 0.6640030 0.12119209 0.05628451
## 2 LQ_1 LQ 1 0.7017 0.6728146 0.11443877 0.06629353
## 3 LP_1 LP 1 0.7217 0.6999984 0.05593123 0.04416523
## 4 LQP_1 LQP 1 0.7232 0.6968577 0.07463082 0.05292211
## var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1 0.07797906 0.007142857 0.0005102041 0.1219780 0.013604100
## 2 0.07758412 0.014835165 0.0009798199 0.1153846 0.010512686
## 3 0.03826417 0.007142857 0.0005102041 0.1082418 0.009453032
## 4 0.05270947 0.014285714 0.0020408163 0.1010989 0.012815146
## AICc delta.AICc w.AIC parameters
## 1 3154.435 48.970574 1.621433e-11 4
## 2 3124.751 19.285718 4.527762e-05 9
## 3 3107.139 1.673881 3.021658e-01 9
## 4 3105.465 0.000000 6.977889e-01 12
#Now retaining the model with the lowest AICc
aic.opt1 <- model_1@models[[which(model_1@results$delta.AICc==0)]]
aic.opt1@results
## [,1]
## X.Training.samples 139.0000
## Regularized.training.gain 0.3116
## Unregularized.training.gain 0.3660
## Iterations 500.0000
## Training.AUC 0.7232
## X.Background.points 2000.0000
## bio07.contribution 34.3752
## bio11.contribution 1.0160
## bio15.contribution 10.0825
## bio16.contribution 10.2449
## bio17.contribution 44.2815
## bio07.permutation.importance 18.4485
## bio11.permutation.importance 0.1033
## bio15.permutation.importance 15.9146
## bio16.permutation.importance 7.1395
## bio17.permutation.importance 58.3941
## Entropy 7.2873
## Prevalence..average.probability.of.presence.over.background.sites. 0.4444
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.2270
## Fixed.cumulative.value.1.area 0.9700
## Fixed.cumulative.value.1.training.omission 0.0072
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.2556
## Fixed.cumulative.value.5.area 0.8650
## Fixed.cumulative.value.5.training.omission 0.0647
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.2898
## Fixed.cumulative.value.10.area 0.7490
## Fixed.cumulative.value.10.training.omission 0.0863
## Minimum.training.presence.cumulative.threshold 0.8095
## Minimum.training.presence.Cloglog.threshold 0.2219
## Minimum.training.presence.area 0.9750
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 11.0883
## X10.percentile.training.presence.Cloglog.threshold 0.2990
## X10.percentile.training.presence.area 0.7260
## X10.percentile.training.presence.training.omission 0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold 37.8350
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.4917
## Equal.training.sensitivity.and.specificity.area 0.3310
## Equal.training.sensitivity.and.specificity.training.omission 0.3309
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 38.1128
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.4944
## Maximum.training.sensitivity.plus.specificity.area 0.3280
## Maximum.training.sensitivity.plus.specificity.training.omission 0.3309
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 0.8095
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.2219
## Balance.training.omission..predicted.area.and.threshold.value.area 0.9750
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 10.8884
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.2969
## Equate.entropy.of.thresholded.and.original.distributions.area 0.7305
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0935
#Here we are looking at the importance of each variable
var.importance(aic.opt1)
## variable percent.contribution permutation.importance
## 1 bio07 34.3752 18.4485
## 2 bio11 1.0160 0.1033
## 3 bio15 10.0825 15.9146
## 4 bio16 10.2449 7.1395
## 5 bio17 44.2815 58.3941
#These are the coefficients for each variable in the model.
parse_lambdas(aic.opt1)
##
## Features with non-zero weights
##
## feature lambda min max type
## bio07 5.66846 8.988 27.32 linear
## bio11^2 -0.02592 265.973 778.73 quadratic
## bio15^2 4.55393 1494.339 25711.19 quadratic
## bio17^2 -4.24517 0.000 200704.00 quadratic
## bio07*bio11 -0.23140 176.373 630.22 product
## bio07*bio15 -5.28997 431.254 3374.55 product
## bio07*bio16 -5.94034 4588.695 44963.61 product
## bio07*bio17 6.62809 0.000 4997.89 product
## bio11*bio17 -1.42323 0.000 10668.97 product
## bio15*bio16 7.95963 23521.858 323752.78 product
## bio15*bio17 7.76101 0.000 23168.02 product
## bio16*bio17 -4.66997 0.000 570766.00 product
##
##
## Features with zero weights
##
## feature lambda min max type
## bio11 0 16.31 27.91 linear
## bio15 0 38.66 160.35 linear
## bio16 0 394.00 2405.00 linear
## bio17 0 0.00 448.00 linear
#Lastly, the response curves for the best model
response(aic.opt1)
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
#Predicting relative probability over the accessible area
NR_pred_1 <- (predict(aic.opt1,NR_stack_m1))
plot(NR_pred_1)
#Predicting over Florida
FL_pred_1 <- (predict(aic.opt1, FL_stack_m1))
plot(FL_pred_1)
#Setting the threshold for presence absence to 5%
m1_thresh <- data.frame(raster::extract(NR_pred_1, coordinates(x_trop_all_cleaned)))
m1_thresh <- na.omit(m1_thresh)
colnames(m1_thresh) <- 'probability'
quantile(m1_thresh$probability, 0.05)
## 5%
## 0.2532359
FL_pred_1_thresholded <- FL_pred_1
FL_pred_1_thresholded[FL_pred_1_thresholded >= aic.opt1@results[44] ] = 3
FL_pred_1_thresholded[FL_pred_1_thresholded >= aic.opt1@results[36] & FL_pred_1_thresholded < aic.opt1@results[44] ] = 2
FL_pred_1_thresholded[FL_pred_1_thresholded >= quantile(m1_thresh$probability, 0.05) & FL_pred_1_thresholded < aic.opt1@results[36] ] = 1
FL_pred_1_thresholded[FL_pred_1_thresholded < quantile(m1_thresh$probability, 0.05) ] = 0
#Plotting the thresholded model
plot(FL_pred_1_thresholded)
# The actual model
NR_stack_m2
## class : RasterStack
## dimensions : 310, 724, 224440, 6 (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667 (x, y)
## extent : -17.54167, 12.625, 2.291667, 15.20833 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
## names : bio05, bio06, bio11, bio15, bio16, bio17
model_2 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
env=NR_stack_m2,
bg.coords =background_T ,occ.grp = random1$occ.grp ,
bg.grp = bg.grp, RMvalues =1,
fc=c('L','LQ','LP','LQP'),
method='user',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = FALSE,progbar = TRUE, updateProgress = TRUE)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
#This will be a table with AICc, and corresponding AUCs for each model
model_2@results
## settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1 L_1 L 1 0.6858 0.6693679 0.10437585 0.05336123
## 2 LQ_1 LQ 1 0.6991 0.6690569 0.10500705 0.06356853
## 3 LP_1 LP 1 0.7124 0.6877365 0.06151944 0.04856117
## 4 LQP_1 LQP 1 0.7187 0.6888835 0.07893354 0.05535377
## var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1 0.06954694 0.007142857 0.0005102041 0.1005495 0.006997612
## 2 0.07442220 0.014285714 0.0009070295 0.1153846 0.009378899
## 3 0.04085400 0.014285714 0.0009070295 0.1082418 0.008319245
## 4 0.05891174 0.007142857 0.0005102041 0.1153846 0.012780260
## AICc delta.AICc w.AIC parameters
## 1 3149.165 40.193184 1.522885e-09 5
## 2 3130.470 21.498968 1.746081e-05 10
## 3 3111.921 2.949689 1.862041e-01 10
## 4 3108.972 0.000000 8.137784e-01 13
#Now retaining the model with the lowest AICc
aic.opt2 <- model_2@models[[which(model_2@results$delta.AICc==0)]]
aic.opt2@results
## [,1]
## X.Training.samples 139.0000
## Regularized.training.gain 0.2840
## Unregularized.training.gain 0.3453
## Iterations 480.0000
## Training.AUC 0.7187
## X.Background.points 2000.0000
## bio05.contribution 7.2353
## bio06.contribution 5.3422
## bio11.contribution 2.4328
## bio15.contribution 35.7333
## bio16.contribution 8.1681
## bio17.contribution 41.0883
## bio05.permutation.importance 10.7961
## bio06.permutation.importance 10.1055
## bio11.permutation.importance 0.0000
## bio15.permutation.importance 15.5904
## bio16.permutation.importance 6.1583
## bio17.permutation.importance 57.3498
## Entropy 7.3139
## Prevalence..average.probability.of.presence.over.background.sites. 0.4581
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.2046
## Fixed.cumulative.value.1.area 0.9655
## Fixed.cumulative.value.1.training.omission 0.0144
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.2543
## Fixed.cumulative.value.5.area 0.8500
## Fixed.cumulative.value.5.training.omission 0.0432
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.3143
## Fixed.cumulative.value.10.area 0.7385
## Fixed.cumulative.value.10.training.omission 0.0791
## Minimum.training.presence.cumulative.threshold 0.3663
## Minimum.training.presence.Cloglog.threshold 0.1909
## Minimum.training.presence.area 0.9870
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 11.8489
## X10.percentile.training.presence.Cloglog.threshold 0.3272
## X10.percentile.training.presence.area 0.7025
## X10.percentile.training.presence.training.omission 0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold 37.7324
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.5119
## Equal.training.sensitivity.and.specificity.area 0.3440
## Equal.training.sensitivity.and.specificity.training.omission 0.3453
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 35.1482
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.4959
## Maximum.training.sensitivity.plus.specificity.area 0.3715
## Maximum.training.sensitivity.plus.specificity.training.omission 0.2806
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 0.3663
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.1909
## Balance.training.omission..predicted.area.and.threshold.value.area 0.9870
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 9.4154
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.3095
## Equate.entropy.of.thresholded.and.original.distributions.area 0.7505
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0791
#Here we are looking at the importance of each variable
var.importance(aic.opt2)
## variable percent.contribution permutation.importance
## 1 bio05 7.2353 10.7961
## 2 bio06 5.3422 10.1055
## 3 bio11 2.4328 0.0000
## 4 bio15 35.7333 15.5904
## 5 bio16 8.1681 6.1583
## 6 bio17 41.0883 57.3498
#These are the coefficients for each variable in the model.
parse_lambdas(aic.opt2)
##
## Features with non-zero weights
##
## feature lambda min max type
## bio05 4.0705 2.512e+01 3.969e+01 linear
## bio06 -1.0789 9.772e+00 2.280e+01 linear
## bio15^2 9.8691 1.494e+03 2.571e+04 quadratic
## bio16^2 0.3041 1.552e+05 5.784e+06 quadratic
## bio17^2 -1.1584 0.000e+00 2.007e+05 quadratic
## bio05*bio15 -10.4067 1.203e+03 5.769e+03 product
## bio05*bio16 -1.6088 1.356e+04 9.232e+04 product
## bio05*bio17 4.1287 0.000e+00 1.394e+04 product
## bio06*bio15 0.3095 7.402e+02 2.754e+03 product
## bio06*bio17 -6.7361 0.000e+00 8.946e+03 product
## bio15*bio16 3.9897 2.352e+04 3.238e+05 product
## bio15*bio17 12.4560 0.000e+00 2.317e+04 product
## bio16*bio17 -5.1683 0.000e+00 5.708e+05 product
##
##
## Features with zero weights
##
## feature lambda min max type
## bio11 0 16.31 27.91 linear
## bio15 0 38.66 160.35 linear
## bio16 0 394.00 2405.00 linear
## bio17 0 0.00 448.00 linear
#Lastly, the response curves for the best model
response(aic.opt2)
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
#Predicting relative probability over the accessible area
NR_pred_2 <- (predict(aic.opt2,NR_stack_m2))
plot(NR_pred_2)
#Predicting over Florida
FL_pred_2 <- (predict(aic.opt2, FL_stack_m2))
plot(FL_pred_2)
#Setting the threshold for presence absence to 5%
m2_thresh <- data.frame(raster::extract(NR_pred_2, coordinates(x_trop_all_cleaned)))
m2_thresh <- na.omit(m2_thresh)
colnames(m2_thresh) <- 'probability'
quantile(m2_thresh$probability, 0.05)
## 5%
## 0.2668274
FL_pred_2_thresholded <- FL_pred_2
FL_pred_2_thresholded[FL_pred_2_thresholded >= aic.opt2@results[46] ] = 3
FL_pred_2_thresholded[FL_pred_2_thresholded >= aic.opt2@results[38] & FL_pred_2_thresholded < aic.opt2@results[46] ] = 2
FL_pred_2_thresholded[FL_pred_2_thresholded >= quantile(m2_thresh$probability, 0.05) & FL_pred_2_thresholded < aic.opt2@results[38] ] = 1
FL_pred_2_thresholded[FL_pred_2_thresholded < quantile(m2_thresh$probability, 0.05) ] = 0
#Plotting the thresholded model
plot(FL_pred_2_thresholded)
# The actual model
NR_stack_m3
## class : RasterStack
## dimensions : 310, 724, 224440, 6 (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667 (x, y)
## extent : -17.54167, 12.625, 2.291667, 15.20833 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
## names : bio06, bio10, bio14, bio15, bio16, forest
## min values : ?, ?, ?, ?, ?, 0
## max values : ?, ?, ?, ?, ?, 1
model_3 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
env=NR_stack_m3,
bg.coords =background_T ,occ.grp = random1$occ.grp ,
bg.grp = bg.grp, RMvalues =1,
fc=c('L','LQ','LP','LQP'),
method='user', categoricals = 'forest',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = FALSE,progbar = TRUE, updateProgress = TRUE)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
#This will be a table with AICc, and corresponding AUCs for each model
model_3@results
## settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1 L_1 L 1 0.6456 0.6296783 0.13383009 0.06127186
## 2 LQ_1 LQ 1 0.6617 0.6263107 0.13678796 0.07355277
## 3 LP_1 LP 1 0.6722 0.6496228 0.08908823 0.05524363
## 4 LQP_1 LQP 1 0.6858 0.6538445 0.10347159 0.06328535
## var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1 0.07770363 0.01428571 0.0009070295 0.1148352 0.01718425
## 2 0.09009733 0.01428571 0.0020408163 0.1368132 0.01754653
## 3 0.05480550 0.01428571 0.0009070295 0.1076923 0.01044694
## 4 0.07082120 0.01428571 0.0020408163 0.1368132 0.01868031
## AICc delta.AICc w.AIC parameters
## 1 3135.317 23.88832 6.480276e-06 5
## 2 3124.806 13.37708 1.241882e-03 8
## 3 3124.660 13.23165 1.335550e-03 12
## 4 3111.429 0.00000 9.974161e-01 12
#Now retaining the model with the lowest AICc
aic.opt3 <- model_3@models[[which(model_3@results$delta.AICc==0)]]
aic.opt3@results
## [,1]
## X.Training.samples 139.0000
## Regularized.training.gain 0.1670
## Unregularized.training.gain 0.2329
## Iterations 500.0000
## Training.AUC 0.6858
## X.Background.points 2000.0000
## bio06.contribution 6.7752
## bio10.contribution 9.3042
## bio14.contribution 37.1510
## bio15.contribution 23.5890
## bio16.contribution 20.9991
## forest.contribution 2.1816
## bio06.permutation.importance 19.9052
## bio10.permutation.importance 8.4572
## bio14.permutation.importance 51.4989
## bio15.permutation.importance 13.3732
## bio16.permutation.importance 6.7656
## forest.permutation.importance 0.0000
## Entropy 7.4331
## Prevalence..average.probability.of.presence.over.background.sites. 0.5225
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.2552
## Fixed.cumulative.value.1.area 0.9685
## Fixed.cumulative.value.1.training.omission 0.0000
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.3191
## Fixed.cumulative.value.5.area 0.8695
## Fixed.cumulative.value.5.training.omission 0.0576
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.3700
## Fixed.cumulative.value.10.area 0.7685
## Fixed.cumulative.value.10.training.omission 0.0863
## Minimum.training.presence.cumulative.threshold 1.4672
## Minimum.training.presence.Cloglog.threshold 0.2652
## Minimum.training.presence.area 0.9550
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 12.7232
## X10.percentile.training.presence.Cloglog.threshold 0.3927
## X10.percentile.training.presence.area 0.7210
## X10.percentile.training.presence.training.omission 0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold 39.0615
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.5761
## Equal.training.sensitivity.and.specificity.area 0.3815
## Equal.training.sensitivity.and.specificity.training.omission 0.3813
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 29.4992
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.5112
## Maximum.training.sensitivity.plus.specificity.area 0.4840
## Maximum.training.sensitivity.plus.specificity.training.omission 0.2374
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 1.4672
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.2652
## Balance.training.omission..predicted.area.and.threshold.value.area 0.9550
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 6.1184
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.3296
## Equate.entropy.of.thresholded.and.original.distributions.area 0.8455
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0647
#Here we are looking at the importance of each variable
var.importance(aic.opt3)
## variable percent.contribution permutation.importance
## 1 bio06 6.7752 19.9052
## 2 bio10 9.3042 8.4572
## 3 bio14 37.1510 51.4989
## 4 bio15 23.5890 13.3732
## 5 bio16 20.9991 6.7656
## 6 forest 2.1816 0.0000
#These are the coefficients for each variable in the model.
parse_lambdas(aic.opt3)
##
## Features with non-zero weights
##
## feature lambda min max type
## bio06 -8.8366 9.772e+00 22.8 linear
## bio16 1.0076 3.940e+02 2405.0 linear
## bio15^2 9.0405 1.494e+03 25711.2 quadratic
## bio16^2 0.8726 1.552e+05 5784025.0 quadratic
## bio06*bio10 7.3732 2.207e+02 664.0 product
## bio06*bio14 -4.2611 0.000e+00 2396.2 product
## bio06*bio15 3.6533 7.402e+02 2754.5 product
## bio10*bio14 3.9265 0.000e+00 3148.8 product
## bio10*bio15 -12.8895 1.014e+03 4554.9 product
## bio14*bio15 8.0329 0.000e+00 5778.0 product
## bio14*bio16 -5.6401 0.000e+00 142346.0 product
## bio15*bio16 0.3674 2.352e+04 323752.8 product
##
##
## Features with zero weights
##
## feature lambda min max type
## (forest==0.0) 0 0.00 1.00 categorical
## bio10 0 19.99 32.03 linear
## bio14 0 0.00 120.00 linear
## bio15 0 38.66 160.35 linear
#Lastly, the response curves for the best model
response(aic.opt3)
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
#Predicting relative probability over the accessible area
NR_pred_3 <- (predict(aic.opt3,NR_stack_m3))
plot(NR_pred_3)
#Predicting over Florida
FL_pred_3 <- (predict(aic.opt3, FL_stack_m3))
plot(FL_pred_3)
#Setting the threshold for presence absence to 5%
m3_thresh <- data.frame(raster::extract(NR_pred_3, coordinates(x_trop_all_cleaned)))
m3_thresh <- na.omit(m3_thresh)
colnames(m3_thresh) <- 'probability'
quantile(m3_thresh$probability, 0.05)
## 5%
## 0.3139689
FL_pred_3_thresholded <- FL_pred_3
FL_pred_3_thresholded[FL_pred_3_thresholded >= aic.opt3@results[46] ] = 3
FL_pred_3_thresholded[FL_pred_3_thresholded >= aic.opt3@results[38] & FL_pred_3_thresholded < aic.opt3@results[46] ] = 2
FL_pred_3_thresholded[FL_pred_3_thresholded >= quantile(m3_thresh$probability, 0.05) & FL_pred_3_thresholded < aic.opt3@results[38] ] = 1
FL_pred_3_thresholded[FL_pred_3_thresholded < quantile(m3_thresh$probability, 0.05) ] = 0
#Plotting the thresholded model
plot(FL_pred_3_thresholded)
# The actual model
NR_stack_m4
## class : RasterStack
## dimensions : 310, 724, 224440, 7 (nrow, ncol, ncell, nlayers)
## resolution : 0.04166667, 0.04166667 (x, y)
## extent : -17.54167, 12.625, 2.291667, 15.20833 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
## names : bio2, bio4, bio06, bio11, bio15, bio16, bio17
model_4 <- ENMevaluate(occ = coordinates(x_trop_all_cleaned),
env=NR_stack_m4,
bg.coords =background_T ,occ.grp = random1$occ.grp ,
bg.grp = bg.grp, RMvalues =1,
fc=c('L','LQ','LP','LQP'),
method='user',
algorithm = 'maxent.jar',clamp = TRUE,rasterPreds = TRUE,
parallel = FALSE,progbar = TRUE, updateProgress = TRUE)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
#This will be a table with AICc, and corresponding AUCs for each model
model_4@results
## settings features rm train.AUC avg.test.AUC var.test.AUC avg.diff.AUC
## 1 L_1 L 1 0.6972 0.6862505 0.06726086 0.04257800
## 2 LQ_1 LQ 1 0.7214 0.7001926 0.06874197 0.05004751
## 3 LP_1 LP 1 0.7503 0.7213130 0.04570191 0.04606394
## 4 LQP_1 LQP 1 0.7467 0.7114385 0.06234145 0.05383243
## var.diff.AUC avg.test.orMTP var.test.orMTP avg.test.or10pct var.test.or10pct
## 1 0.04100704 0.000000000 0.0000000000 0.1214286 0.013662132
## 2 0.04852184 0.014285714 0.0020408163 0.1148352 0.011515316
## 3 0.03515968 0.007142857 0.0005102041 0.1076923 0.008179366
## 4 0.05011068 0.007142857 0.0005102041 0.1076923 0.012714514
## AICc delta.AICc w.AIC parameters
## 1 3099.291 27.363396 9.942623e-07 6
## 2 3088.541 16.614312 2.146026e-04 10
## 3 3071.927 0.000000 8.697350e-01 17
## 4 3075.728 3.800548 1.300494e-01 19
#Now retaining the model with the lowest AICc
aic.opt4 <- model_4@models[[which(model_4@results$delta.AICc==0)]]
aic.opt4@results
## [,1]
## X.Training.samples 139.0000
## Regularized.training.gain 0.4158
## Unregularized.training.gain 0.4833
## Iterations 500.0000
## Training.AUC 0.7503
## X.Background.points 2000.0000
## bio06.contribution 6.0628
## bio11.contribution 10.9602
## bio15.contribution 9.4240
## bio16.contribution 8.8258
## bio17.contribution 17.1500
## bio2.contribution 20.1481
## bio4.contribution 27.4291
## bio06.permutation.importance 12.5048
## bio11.permutation.importance 4.7848
## bio15.permutation.importance 11.6010
## bio16.permutation.importance 6.2969
## bio17.permutation.importance 34.1247
## bio2.permutation.importance 18.1242
## bio4.permutation.importance 12.5637
## Entropy 7.1865
## Prevalence..average.probability.of.presence.over.background.sites. 0.4000
## Fixed.cumulative.value.1.cumulative.threshold 1.0000
## Fixed.cumulative.value.1.Cloglog.threshold 0.1372
## Fixed.cumulative.value.1.area 0.9305
## Fixed.cumulative.value.1.training.omission 0.0144
## Fixed.cumulative.value.5.cumulative.threshold 5.0000
## Fixed.cumulative.value.5.Cloglog.threshold 0.2390
## Fixed.cumulative.value.5.area 0.8050
## Fixed.cumulative.value.5.training.omission 0.0504
## Fixed.cumulative.value.10.cumulative.threshold 10.0000
## Fixed.cumulative.value.10.Cloglog.threshold 0.2950
## Fixed.cumulative.value.10.area 0.6995
## Fixed.cumulative.value.10.training.omission 0.0935
## Minimum.training.presence.cumulative.threshold 0.0718
## Minimum.training.presence.Cloglog.threshold 0.0499
## Minimum.training.presence.area 0.9850
## Minimum.training.presence.training.omission 0.0000
## X10.percentile.training.presence.cumulative.threshold 13.6050
## X10.percentile.training.presence.Cloglog.threshold 0.3195
## X10.percentile.training.presence.area 0.6350
## X10.percentile.training.presence.training.omission 0.0935
## Equal.training.sensitivity.and.specificity.cumulative.threshold 37.6878
## Equal.training.sensitivity.and.specificity.Cloglog.threshold 0.4504
## Equal.training.sensitivity.and.specificity.area 0.3095
## Equal.training.sensitivity.and.specificity.training.omission 0.3094
## Maximum.training.sensitivity.plus.specificity.cumulative.threshold 34.8433
## Maximum.training.sensitivity.plus.specificity.Cloglog.threshold 0.4379
## Maximum.training.sensitivity.plus.specificity.area 0.3420
## Maximum.training.sensitivity.plus.specificity.training.omission 0.2302
## Balance.training.omission..predicted.area.and.threshold.value.cumulative.threshold 0.0718
## Balance.training.omission..predicted.area.and.threshold.value.Cloglog.threshold 0.0499
## Balance.training.omission..predicted.area.and.threshold.value.area 0.9850
## Balance.training.omission..predicted.area.and.threshold.value.training.omission 0.0000
## Equate.entropy.of.thresholded.and.original.distributions.cumulative.threshold 12.1530
## Equate.entropy.of.thresholded.and.original.distributions.Cloglog.threshold 0.3100
## Equate.entropy.of.thresholded.and.original.distributions.area 0.6605
## Equate.entropy.of.thresholded.and.original.distributions.training.omission 0.0935
#Here we are looking at the importance of each variable
var.importance(aic.opt4)
## variable percent.contribution permutation.importance
## 1 bio06 6.0628 12.5048
## 2 bio11 10.9602 4.7848
## 3 bio15 9.4240 11.6010
## 4 bio16 8.8258 6.2969
## 5 bio17 17.1500 34.1247
## 6 bio2 20.1481 18.1242
## 7 bio4 27.4291 12.5637
#These are the coefficients for each variable in the model.
parse_lambdas(aic.opt4)
##
## Features with non-zero weights
##
## feature lambda min max type
## bio06 0.25602 9.772 22.80 linear
## bio11 -2.69808 16.309 27.91 linear
## bio2 8.21237 6.128 16.79 linear
## bio06*bio16 1.44891 6509.584 52527.38 product
## bio06*bio17 0.18531 0.000 8945.66 product
## bio06*bio2 -0.45070 107.015 241.55 product
## bio06*bio4 6.76774 793.004 4067.63 product
## bio11*bio4 -0.07104 1407.320 6088.96 product
## bio15*bio16 6.36461 23521.858 323752.78 product
## bio15*bio17 6.95965 0.000 23168.02 product
## bio15*bio2 3.24707 296.948 2326.00 product
## bio16*bio17 -4.24404 0.000 570766.00 product
## bio16*bio2 -0.85607 3348.035 26071.86 product
## bio16*bio4 -6.64701 42338.120 433288.82 product
## bio17*bio2 -2.37030 0.000 4132.99 product
## bio17*bio4 5.11139 0.000 51811.45 product
## bio2*bio4 -11.36027 522.607 3456.82 product
##
##
## Features with zero weights
##
## feature lambda min max type
## bio15 0 38.66 160.3 linear
## bio16 0 394.00 2405.0 linear
## bio17 0 0.00 448.0 linear
## bio4 0 57.24 252.4 linear
#Lastly, the response curves for the best model
response(aic.opt4)
x_trop_all_cleaned <- read.csv('data/xenopus_raster_values.csv')
coordinates(x_trop_all_cleaned) <- ~ decimalLongitude + decimalLatitude
crs(x_trop_all_cleaned) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
#Predicting relative probability over the accessible area
NR_pred_4 <- (predict(aic.opt4,NR_stack_m4))
plot(NR_pred_4)
#Predicting over Florida
FL_pred_4 <- (predict(aic.opt4, FL_stack_m4))
plot(FL_pred_4)
#Setting the threshold for presence absence to 5%
m4_thresh <- data.frame(raster::extract(NR_pred_4, coordinates(x_trop_all_cleaned)))
m4_thresh <- na.omit(m4_thresh)
colnames(m4_thresh) <- 'probability'
quantile(m4_thresh$probability, 0.05)
## 5%
## 0.2584182
FL_pred_4_thresholded <- FL_pred_4
FL_pred_4_thresholded[FL_pred_4_thresholded >= aic.opt4@results[48] ] = 3
FL_pred_4_thresholded[FL_pred_4_thresholded >= aic.opt4@results[40] & FL_pred_4_thresholded < aic.opt4@results[48] ] = 2
FL_pred_4_thresholded[FL_pred_4_thresholded >= quantile(m4_thresh$probability, 0.05) & FL_pred_4_thresholded < aic.opt4@results[40] ] = 1
FL_pred_4_thresholded[FL_pred_4_thresholded < quantile(m4_thresh$probability, 0.05) ] = 0
#Plotting the thresholded model
plot(FL_pred_4_thresholded)