Here is a simple reproductible example of how to produce ROC curves within biomod2 modelling framework.
Note 1: Lot of tuning should be done in the plotting part but.
Note 2: Some of this functionalities might be integreated in a next release of biomnod2.
Note 3: The following script is a bit under commentted, feel free to ask if some parts require clarification.
build GuloGulo distribution models (based on example)
get all required data (e.g. species occ data, models predictions, models evaluations lines… )
vompute the roc evaluation for all the models
plot the roc curves
rm(list = ls())
## load the package
library(biomod2)
## biomod2 3.3-18 loaded.
##
## Type browseVignettes(package='biomod2') to access directly biomod2 vignettes.
sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 10586)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] biomod2_3.3-18
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-131 lubridate_1.7.4 doParallel_1.0.11
## [4] dimRed_0.1.0 RColorBrewer_1.1-2 rprojroot_1.3-2
## [7] tools_3.4.0 backports_1.1.2 R6_2.2.2
## [10] rpart_4.1-13 lazyeval_0.2.1 colorspace_1.3-2
## [13] nnet_7.3-12 raster_2.6-7 withr_2.1.2
## [16] gbm_2.1.3 sp_1.3-1 tidyselect_0.2.4
## [19] mnormt_1.5-5 compiler_3.4.0 mda_0.4-10
## [22] SparseM_1.77 xml2_1.2.0 NLP_0.1-11
## [25] slam_0.1-43 sfsmisc_1.1-2 scales_0.5.0
## [28] DEoptimR_1.0-8 hexbin_1.27.2 tm_0.7-4
## [31] psych_1.8.4 robustbase_0.93-1 randomForest_4.6-14
## [34] plotmo_3.4.1 PresenceAbsence_1.1.9 stringr_1.3.1
## [37] digest_0.6.15 foreign_0.8-70 rmarkdown_1.10
## [40] dismo_1.1-4 pkgconfig_2.0.1 htmltools_0.3.6
## [43] plotrix_3.7-2 rlang_0.2.1 ddalpha_1.3.4
## [46] bindr_0.1.1 zoo_1.8-2 dplyr_0.7.5
## [49] ModelMetrics_1.1.0 magrittr_1.5 Matrix_1.2-9
## [52] Rcpp_0.12.17 munsell_0.5.0 abind_1.4-5
## [55] stringi_1.1.7 pROC_1.12.1 yaml_2.1.19
## [58] MASS_7.3-47 plyr_1.8.4 recipes_0.1.3
## [61] grid_3.4.0 pls_2.6-0 parallel_3.4.0
## [64] earth_4.6.3 rasterVis_0.45 lattice_0.20-35
## [67] splines_3.4.0 knitr_1.20 pillar_1.2.3
## [70] stats4_3.4.0 reshape2_1.4.3 codetools_0.2-15
## [73] CVST_0.2-2 magic_1.5-8 glue_1.2.0
## [76] evaluate_0.10.1 latticeExtra_0.6-28 foreach_1.4.4
## [79] gtable_0.2.0 purrr_0.2.5 tidyr_0.8.1
## [82] kernlab_0.9-26 reshape_0.8.7 assertthat_0.2.0
## [85] DRR_0.0.3 ggplot2_2.2.1 TeachingDemos_2.10
## [88] gower_0.1.2 prodlim_2018.04.18 maxent_1.3.3.1
## [91] broom_0.4.4 class_7.3-14 survival_2.41-3
## [94] viridisLite_0.3.0 geometry_0.3-6 timeDate_3043.102
## [97] RcppRoll_0.3.0 tibble_1.4.2 rJava_0.9-10
## [100] iterators_1.0.9 lava_1.6.1 bindrcpp_0.2.2
## [103] ENMeval_0.2.2 caret_6.0-80 ipred_0.9-6
## use the BIOMOD_Modeling example as basis
example(BIOMOD_Modeling)
##
## BIOMOD> # species occurrences
## BIOMOD> DataSpecies <- read.csv(system.file("external/species/mammals_table.csv",
## BIOMOD+ package="biomod2"))
##
## BIOMOD> head(DataSpecies)
## X X_WGS84 Y_WGS84 ConnochaetesGnou GuloGulo PantheraOnca
## 1 1 -94.5 82.00001 0 0 0
## 2 2 -91.5 82.00001 0 1 0
## 3 3 -88.5 82.00001 0 1 0
## 4 4 -85.5 82.00001 0 1 0
## 5 5 -82.5 82.00001 0 1 0
## 6 6 -79.5 82.00001 0 1 0
## PteropusGiganteus TenrecEcaudatus VulpesVulpes
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
##
## BIOMOD> # the name of studied species
## BIOMOD> myRespName <- 'GuloGulo'
##
## BIOMOD> # the presence/absences data for our species
## BIOMOD> myResp <- as.numeric(DataSpecies[,myRespName])
##
## BIOMOD> # the XY coordinates of species data
## BIOMOD> myRespXY <- DataSpecies[,c("X_WGS84","Y_WGS84")]
##
## BIOMOD> # Environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
## BIOMOD> myExpl = raster::stack( system.file( "external/bioclim/current/bio3.grd",
## BIOMOD+ package="biomod2"),
## BIOMOD+ system.file( "external/bioclim/current/bio4.grd",
## BIOMOD+ package="biomod2"),
## BIOMOD+ system.file( "external/bioclim/current/bio7.grd",
## BIOMOD+ package="biomod2"),
## BIOMOD+ system.file( "external/bioclim/current/bio11.grd",
## BIOMOD+ package="biomod2"),
## BIOMOD+ system.file( "external/bioclim/current/bio12.grd",
## BIOMOD+ package="biomod2"))
##
## BIOMOD> # 1. Formatting Data
## BIOMOD> myBiomodData <- BIOMOD_FormatingData(resp.var = myResp,
## BIOMOD+ expl.var = myExpl,
## BIOMOD+ resp.xy = myRespXY,
## BIOMOD+ resp.name = myRespName)
##
## -=-=-=-=-=-=-=-=-=-=-=-= GuloGulo Data Formating -=-=-=-=-=-=-=-=-=-=-=-=
##
## > No pseudo absences selection !
## ! No data has been set aside for modeling evaluation
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
##
## BIOMOD> # 2. Defining Models Options using default options.
## BIOMOD> myBiomodOption <- BIOMOD_ModelingOptions()
##
## BIOMOD> # 3. Doing Modelisation
## BIOMOD>
## BIOMOD> myBiomodModelOut <- BIOMOD_Modeling( myBiomodData,
## BIOMOD+ models = c('SRE','RF'),
## BIOMOD+ models.options = myBiomodOption,
## BIOMOD+ NbRunEval=2,
## BIOMOD+ DataSplit=80,
## BIOMOD+ VarImport=0,
## BIOMOD+ models.eval.meth = c('TSS','ROC'),
## BIOMOD+ do.full.models=FALSE,
## BIOMOD+ modeling.id="test")
##
##
## Loading required library...
##
## Checking Models arguments...
##
## Creating suitable Workdir...
##
## > No weights : all observations will have the same weight
##
##
## -=-=-=-=-=-=-=-=-=-=-=-= GuloGulo Modeling Summary -=-=-=-=-=-=-=-=-=-=-=-=
##
## 5 environmental variables ( bio3 bio4 bio7 bio11 bio12 )
## Number of evaluation repetitions : 2
## Models selected : SRE RF
##
## Total number of model runs : 4
##
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
##
##
## -=-=-=- Run : GuloGulo_AllData
##
##
## -=-=-=--=-=-=- GuloGulo_AllData_RUN1
##
## Model=Surface Range Envelop
## Evaluating Model stuff...
## Model=Breiman and Cutler's random forests for classification and regression
## Evaluating Model stuff...
##
## -=-=-=--=-=-=- GuloGulo_AllData_RUN2
##
## Model=Surface Range Envelop
## Evaluating Model stuff...
## Model=Breiman and Cutler's random forests for classification and regression
## Evaluating Model stuff...
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
##
## BIOMOD> ## print a summary of modeling stuff
## BIOMOD> myBiomodModelOut
##
## -=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.models.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=
##
## Modeling id : test
##
## Species modeled : GuloGulo
##
## Considered variables : bio3 bio4 bio7 bio11 bio12
##
##
## Computed Models : GuloGulo_AllData_RUN1_SRE GuloGulo_AllData_RUN1_RF
## GuloGulo_AllData_RUN2_SRE GuloGulo_AllData_RUN2_RF
##
##
## Failed Models : none
##
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
ls()
## [1] "DataSpecies" "myBiomodData" "myBiomodModelOut"
## [4] "myBiomodOption" "myExpl" "myResp"
## [7] "myRespName" "myRespXY"
## formale response variable
form.dat <- get_formal_data(myBiomodModelOut, 'resp.var')
length(form.dat)
## [1] 2488
## models predictions
pred.val <- get_predictions(myBiomodModelOut)
dim(pred.val)
## [1] 2488 2 2 1
dimnames(pred.val)
## [[1]]
## NULL
##
## [[2]]
## [1] "SRE" "RF"
##
## [[3]]
## [1] "RUN1" "RUN2"
##
## [[4]]
## GuloGulo_AllData
## "AllData"
## calibration lines
calib.lines <- get_calib_lines(myBiomodModelOut)
dim(calib.lines)
## [1] 2488 2 1
dimnames(calib.lines)
## [[1]]
## NULL
##
## [[2]]
## [1] "_RUN1" "_RUN2"
##
## [[3]]
## [1] "_AllData"
## get all combination of model, cv run, pa data
model.comb <-
expand.grid(
mod = dimnames(pred.val)[[2]],
cv = dimnames(pred.val)[[3]],
pa = dimnames(pred.val)[[4]],
stringsAsFactors = FALSE
)
## compute all the roc cuurves
mod.roc <-
lapply(
1:nrow(model.comb),
function(i){
mod <- model.comb$mod[i]
cv <- model.comb$cv[i]
pa <- model.comb$pa[i]
eval.lines <- !calib.lines[, paste0('_', cv), paste0('_', pa)]
resp <- form.dat[eval.lines]
pred <- pred.val[eval.lines, mod, cv, pa] / 1000
pROC::roc(resp, pred)
}
)
## plot roc curves
par(mfrow = c(2,2))
lapply(mod.roc, plot)
## [[1]]
##
## Call:
## roc.default(response = resp, predictor = pred)
##
## Data: pred in 365 controls (resp 0) < 132 cases (resp 1).
## Area under the curve: 0.8699
##
## [[2]]
##
## Call:
## roc.default(response = resp, predictor = pred)
##
## Data: pred in 365 controls (resp 0) < 132 cases (resp 1).
## Area under the curve: 0.9919
##
## [[3]]
##
## Call:
## roc.default(response = resp, predictor = pred)
##
## Data: pred in 365 controls (resp 0) < 132 cases (resp 1).
## Area under the curve: 0.8444
##
## [[4]]
##
## Call:
## roc.default(response = resp, predictor = pred)
##
## Data: pred in 365 controls (resp 0) < 132 cases (resp 1).
## Area under the curve: 0.9845