Covariates extraction (Baseline and Projected)

We first downloaded the shape file of Ghana map using the getData() function of the package raster.

# Set working directory
setwd("~/Cours Master BIOSTAT/M2/Species_Distribution_Modeling/Evaluation")

# Important packages 
library(dismo)
library(rgdal)

gha_data <- getData('GADM', country='GHA', level=0)
plot(gha_data, col = 'light yellow', main = "Ghana Map")

We then import the Baseline environmental spatial data . Those files are Geotif files downloaded on the York University web site for african continent.

# Import recent bio data from Africlimat
bio12<- raster('Present/bio12_wc30s.tif')

bio15 <- raster('Present/bio15_wc30s.tif')

mi <- raster('Present/mi_wc30s.tif')

dm <- raster('Present/dm_wc30s.tif')

llds <- raster('Present/llds_wc30s.tif')

climAfr <- stack(bio12, bio15, mi, dm,llds)
names(climAfr) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(climAfr)

Cropping for Ghana Map

We are interested by the spatial data for Ghana country. We then cropped the Africa data associated to Ghana.

bio12G2<- mask(crop(bio12, gha_data), gha_data)

bio15gha <- mask(crop(bio15, gha_data), gha_data)

migha <- mask(crop(mi, gha_data), gha_data)

dmgha <- mask(crop(dm, gha_data), gha_data)

lldsgha <- mask(crop(llds, gha_data), gha_data)

# Stack environmental data 
presence <- stack(bio12G2, bio15gha, migha, dmgha, lldsgha)
names(presence) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(presence)

Projected or future Climatic Data

We did the same procedure for the projected data gotten in the same website.

# Reading the future/projected data
bio12F<- mask(crop(raster('PredAfriclimAT/bio12_rcp45.tif'),
                    gha_data), gha_data)
bio15F <- mask(crop(raster('PredAfriclimAT/bio15_rcp45.tif'),
                    gha_data), gha_data)
miF <- mask(crop(raster('PredAfriclimAT/mi_rcp45.tif'),gha_data),
            gha_data)
dmF <- mask(crop(raster('PredAfriclimAT/dm_rcp45.tif'),gha_data),
            gha_data)
lldsF <- mask(crop(raster('PredAfriclimAT/llds_rcp45.tif'),
                   gha_data), gha_data)

future <- stack(bio12F, bio15F, miF, dmF,lldsF)
names(future) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(future)

Species Presence Prediction

In this section we presented the main results attached with R code.

Importing the database of presence

We first import the presence (occurrences) spatial data of the concerned specie in Ghana country. The data set was cleaned so that we removed the empty data and non geo referenced occurrences We plotted the presences of the specie on the map as bellow.

# reading the data from the csv file
dataset <- read.csv('PresenceAbs/Strombosia.csv', header = T, sep = '\t')
names(dataset)
##  [1] "gbifID"                           "datasetKey"                      
##  [3] "occurrenceID"                     "kingdom"                         
##  [5] "phylum"                           "class"                           
##  [7] "order"                            "family"                          
##  [9] "genus"                            "species"                         
## [11] "infraspecificEpithet"             "taxonRank"                       
## [13] "scientificName"                   "verbatimScientificName"          
## [15] "verbatimScientificNameAuthorship" "countryCode"                     
## [17] "locality"                         "stateProvince"                   
## [19] "occurrenceStatus"                 "individualCount"                 
## [21] "publishingOrgKey"                 "decimalLatitude"                 
## [23] "decimalLongitude"                 "coordinateUncertaintyInMeters"   
## [25] "coordinatePrecision"              "elevation"                       
## [27] "elevationAccuracy"                "depth"                           
## [29] "depthAccuracy"                    "eventDate"                       
## [31] "day"                              "month"                           
## [33] "year"                             "taxonKey"                        
## [35] "speciesKey"                       "basisOfRecord"                   
## [37] "institutionCode"                  "collectionCode"                  
## [39] "catalogNumber"                    "recordNumber"                    
## [41] "identifiedBy"                     "dateIdentified"                  
## [43] "license"                          "rightsHolder"                    
## [45] "recordedBy"                       "typeStatus"                      
## [47] "establishmentMeans"               "lastInterpreted"                 
## [49] "mediaType"                        "issue"
# Data cleaning
dataset <- dataset[,c(13,23,22)]
names(dataset)[2:3] <- c('lon','lat')
dataset <- subset(dataset, (!is.na(lon) | !is.na(lat)) & (lon!= 0 & lat != 0)) 
dataset <- subset(dataset[!duplicated(dataset[,2:3]),], lon < 1 & lon > - 3.2)
dataKhaya <- dataset
species <- dataKhaya[1,1]

# ---Or use Ghana map 
plot(gha_data, col = 'light yellow', main = paste(species,"Presences in Ghana"))
points(dataset$lon, dataset$lat, col="orange",pch=20, cex=1)

Extracting the environmental conditions

We use the baseline environmental data to extract the environment conditions of the specie presence.

# We check the crs format of the covariates and the species coordinates
presence@crs
## Coordinate Reference System:
## Deprecated Proj.4 representation: +proj=longlat +datum=WGS84 +no_defs 
## WKT2 2019 representation:
## GEOGCRS["WGS 84 (with axis order normalized for visualization)",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]]]

The baseline data had the same CRS than the species coordinates (decimal coordinates). We do not need to transform the two formats.

We could extract the raster information into a database as following:

datasetC <- dataset
dataset <- dataset[,2:3]
coordinates(dataset) <- c('lon', 'lat')
proj4string(dataset) <- CRS("+proj=longlat +datum=WGS84 +no_defs +
                            ellps=WGS84 +towgs84=0,0,0")

# Extraction -----------------------------------------------------
habGha_data <- extract(presence, dataset)
habGha_data <- cbind.data.frame(dataset, as.data.frame(habGha_data))

summary(habGha_data)
##       lon              lat            bio12          bio15       
##  Min.   :-3.055   Min.   :4.750   Min.   : 978   Min.   : 50.00  
##  1st Qu.:-2.857   1st Qu.:5.479   1st Qu.:1244   1st Qu.: 60.75  
##  Median :-2.560   Median :6.366   Median :1438   Median : 65.00  
##  Mean   :-2.220   Mean   :6.240   Mean   :1490   Mean   : 72.43  
##  3rd Qu.:-2.199   3rd Qu.:6.690   3rd Qu.:1666   3rd Qu.: 73.25  
##  Max.   : 0.550   Max.   :7.609   Max.   :2006   Max.   :118.00  
##        mi               dm             llds      
##  Min.   : 63.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 76.00   1st Qu.:2.000   1st Qu.:2.000  
##  Median : 88.00   Median :3.000   Median :3.000  
##  Mean   : 95.21   Mean   :2.893   Mean   :2.821  
##  3rd Qu.:115.00   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :136.00   Max.   :5.000   Max.   :4.000
# Extract the center of each cell
myxy <- xyFromCell(presence[[1]], cell = 1:ncell(presence[[1]]))
# Get all values of the environmental variables
myvar <- getValues(presence)
# create a database with location and the values for environmental variables
preEnv <- na.omit(cbind.data.frame(myxy,myvar))

# presence 'absence' matrix
pa <-c(rep(1,nrow(habGha_data)),rep(0,nrow(preEnv)))

# model data
model_data <- rbind.data.frame(habGha_data[,3:7],preEnv[,3:7])

We use the maxent() function for the Maximum Entropy species distribution model from the dismo package. Once the model obtained, we predicted the suitability of the baseline environmental covariates locations. We use the following code:

# MaxEnt prediction -------------------------------------------
# Model for actual data

MSWD <- maxent(x=model_data, p=pa, path=paste(getwd(),"/MaxentResult", sep=""),
               args=c("-P", "noautofeature", "nothreshold", "noproduct",
                      paste("maximumbackground=",nrow(preEnv), sep=""), "noaddsamplestobackground"))
## This is MaxEnt version 3.4.3
# visualisation
plot(MSWD)

response(MSWD)

# Prediction with MaxEnt Model based on extracted data -------------------

MSWD.pred <- predict(MSWD, presence, args="outputformat=raw", sep="",
                     progress="text")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
plot(MSWD.pred, main = paste("Suitability of locations in 1975\n for",species))
points(dataset$lon, dataset$lat, col=rgb(0,0,139,255*0.4,maxColorValue = 255),pch=20, cex=1)

Prediction of suitability locations using projected environmental variables

We use the model created bellow to predict the suitability locations in 2055.

# Prediction using the projected data
MSWD.F <- predict(MSWD, future, args="outputformat=raw", sep="",
                 progress="text")
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
plot(MSWD.F, main = paste("Suitability of locations In 2025\n for",species))
points(dataset$lon, dataset$lat, col="darkblue",pch=20, cex=1)

Combine the two graphs

par(mfrow = c(1,2))
plot(MSWD.pred, main = "BaseLine")
plot(MSWD.F, main = "Projected 2055")

Conclusion