How to extract presences, absences and pseudo absences to produce custom plots

Abstract

This script is designed to show how you can simply extract presences, absemces and pseudo absences from a data.formatted.Biomod.object object (output of BIOMOD_FormatingData()).

This can be particularly useful to produce custom plots of the modelling data used within biomod2 framework.

Script

Data formatting

Here we sill create a toy data.formatted.Biomod.object object using BIOMOD_FormatingData() function.

Here are couple parameters used for the data formatting procedure:

species : Gulo Gulo
explanatory variables: worlclim bioclimatic variables (bio_3, bio_4, bio_7, bio_11 & bio_12)
numbeer of absences: 0
number of pseudo absences: 500
number of pseudo absences sampling repetiton: 2
pseudo absences selection algorithm: random

setwd('D:/georgesd/Documents/_TEMP/')

library(biomod2)

## biomod2 3.3-18 loaded.
## 
## Type browseVignettes(package='biomod2') to access directly biomod2 vignettes.

# species occurrences
DataSpecies <- read.csv(system.file("external/species/mammals_table.csv",
                                    package="biomod2"), row.names = 1)
head(DataSpecies)

##   X_WGS84  Y_WGS84 ConnochaetesGnou GuloGulo PantheraOnca
## 1   -94.5 82.00001                0        0            0
## 2   -91.5 82.00001                0        1            0
## 3   -88.5 82.00001                0        1            0
## 4   -85.5 82.00001                0        1            0
## 5   -82.5 82.00001                0        1            0
## 6   -79.5 82.00001                0        1            0
##   PteropusGiganteus TenrecEcaudatus VulpesVulpes
## 1                 0               0            0
## 2                 0               0            0
## 3                 0               0            0
## 4                 0               0            0
## 5                 0               0            0
## 6                 0               0            0

# the name of studied species
myRespName <- 'GuloGulo'

# the presence/absences data for our species
myRespDF <- DataSpecies[DataSpecies[, myRespName] == 1 ,c("X_WGS84","Y_WGS84", myRespName)]

myResp <- as.numeric(myRespDF[,myRespName])

# the XY coordinates of species data
myRespXY <- myRespDF[,c("X_WGS84","Y_WGS84")]


# Environmental variables extracted from Worldclim (bio_3, bio_4, bio_7, bio_11 & bio_12)
myExpl = raster::stack( system.file( "external/bioclim/current/bio3.grd",
                                     package="biomod2"),
                        system.file( "external/bioclim/current/bio4.grd",
                                     package="biomod2"),
                        system.file( "external/bioclim/current/bio7.grd",
                                     package="biomod2"),
                        system.file( "external/bioclim/current/bio11.grd",
                                     package="biomod2"),
                        system.file( "external/bioclim/current/bio12.grd",
                                     package="biomod2"))

# Formatting data in biomod2 friendly sahpe
myBiomodData <- BIOMOD_FormatingData(resp.var = myResp,
                                     expl.var = myExpl,
                                     resp.xy = myRespXY,
                                     resp.name = myRespName,
                                     PA.nb.rep = 2,
                                     PA.nb.absences = 500)

## 
## -=-=-=-=-=-=-=-=-=-=-=-= GuloGulo Data Formating -=-=-=-=-=-=-=-=-=-=-=-=
## 
##       ! No data has been set aside for modeling evaluation
##    > Pseudo Absences Selection checkings...
##    > random pseudo absences selection
##    > Pseudo absences are selected in explanatory variables
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

The extraction funcrtions

Here are define 2 small function that will help you to extract the points locations of a data.formatted.Biomod.object

note: This function will be integreted in a next release of biomod2

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.4.4

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## function to get PA dataset
get_PAtab <- function(bfd){
  dplyr::bind_cols(
    x = bfd@coord[, 1],
    y = bfd@coord[, 2],
    status = bfd@data.species,
    bfd@PA
  )
}

## function to get background mask
get_mask <- function(bfd){
  bfd@data.mask
}

Extract presences, absences and pseudo-absences tables

When you have extracted the PA table from data.formatted.Biomod.object you can easily select presences (filter(status == 1)), pseudo-absences (filter(is.na(status))) or absences (filter(status == 0)) even if no absences are dfined in our case

## get the coordiantes of presences
(pres.xy <- get_PAtab(myBiomodData) %>% 
    filter(status == 1) %>%
    select(x, y))

## Warning: package 'bindrcpp' was built under R version 3.4.4

## # A tibble: 661 x 2
##        x     y
##    <dbl> <dbl>
##  1 -91.5  82.0
##  2 -88.5  82.0
##  3 -85.5  82.0
##  4 -82.5  82.0
##  5 -79.5  82.0
##  6 -76.5  82.0
##  7 -73.5  82.0
##  8 -70.5  82.0
##  9 -67.5  82.0
## 10 -64.5  82.0
## # ... with 651 more rows

## get the coordiantes of absences ==> No absences here
(abs.xy <- get_PAtab(myBiomodData) %>% 
    filter(status == 0) %>%
    select(x, y))

## # A tibble: 0 x 2
## # ... with 2 variables: x <dbl>, y <dbl>

## get the coordiantes of pseudo - absences
## all repetition of pseudo absences sampling merged 
(pa.all.xy <- get_PAtab(myBiomodData) %>% 
    filter(is.na(status)) %>%
    select(x, y)) %>%
    distinct()

## # A tibble: 863 x 2
##        x     y
##    <dbl> <dbl>
##  1 -94.5  82.0
##  2 -58.5  82.0
##  3 -49.5  82.0
##  4 -46.5  82.0
##  5 -37.5  82.0
##  6 -28.5  82.0
##  7 -25.5  82.0
##  8 -22.5  82.0
##  9 -10.5  82.0
## 10  19.5  82.0
## # ... with 853 more rows

## pseudo absences sampling for the first repetition only 
(pa.1.xy <- get_PAtab(myBiomodData) %>% 
    filter(is.na(status) & PA1 == TRUE) %>%
    select(x, y)) %>%
    distinct()

## # A tibble: 500 x 2
##         x     y
##     <dbl> <dbl>
##  1  -94.5  82.0
##  2  -58.5  82.0
##  3  -49.5  82.0
##  4  -46.5  82.0
##  5  -28.5  82.0
##  6  -25.5  82.0
##  7   49.5  82.0
##  8   64.5  82.0
##  9 -116.   79.0
## 10 -112.   79.0
## # ... with 490 more rows

Plot example

Here is an example of plot that could be usefull when resolution is too high and presences could not be diplayed correctly using the data.formatted.Biomod.object plot() function

## plot the first PA selection and add the presences on top
plot(get_mask(myBiomodData)[['PA1']])
points(pres.xy, pch = 11)