We first downloaded the shape file of Ghana map using the getData() function of the package raster.
# Set working directory
setwd("~/Cours Master BIOSTAT/M2/Species_Distribution_Modeling/Evaluation")
# Important packages
library(dismo)
library(rgdal)
gha_data <- getData('GADM', country='GHA', level=0)
plot(gha_data, col = 'light yellow', main = "Ghana Map")
We then import the Baseline environmental spatial data . Those files are Geotif files downloaded on the York University web site for african continent.
# Import recent bio data from Africlimat
bio12<- raster('Present/bio12_wc30s.tif')
bio15 <- raster('Present/bio15_wc30s.tif')
mi <- raster('Present/mi_wc30s.tif')
dm <- raster('Present/dm_wc30s.tif')
llds <- raster('Present/llds_wc30s.tif')
climAfr <- stack(bio12, bio15, mi, dm,llds)
names(climAfr) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(climAfr)
Cropping for Ghana Map
We are interested by the spatial data for Ghana country. We then cropped the Africa data associated to Ghana.
bio12G2<- mask(crop(bio12, gha_data), gha_data)
bio15gha <- mask(crop(bio15, gha_data), gha_data)
migha <- mask(crop(mi, gha_data), gha_data)
dmgha <- mask(crop(dm, gha_data), gha_data)
lldsgha <- mask(crop(llds, gha_data), gha_data)
# Stack environmental data
presence <- stack(bio12G2, bio15gha, migha, dmgha, lldsgha)
names(presence) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(presence)
We did the same procedure for the projected data gotten in the same website.
# Reading the future/projected data
bio12F<- mask(crop(raster('PredAfriclimAT/bio12_rcp45.tif'),
gha_data), gha_data)
bio15F <- mask(crop(raster('PredAfriclimAT/bio15_rcp45.tif'),
gha_data), gha_data)
miF <- mask(crop(raster('PredAfriclimAT/mi_rcp45.tif'),gha_data),
gha_data)
dmF <- mask(crop(raster('PredAfriclimAT/dm_rcp45.tif'),gha_data),
gha_data)
lldsF <- mask(crop(raster('PredAfriclimAT/llds_rcp45.tif'),
gha_data), gha_data)
future <- stack(bio12F, bio15F, miF, dmF,lldsF)
names(future) <- c("bio12", "bio15", "mi", "dm", "llds")
plot(future)
In this section we presented the main results attached with R code.
We first import the presence (occurrences) spatial data of the concerned specie in Ghana country. The data set was cleaned so that we removed the empty data and non geo referenced occurrences We plotted the presences of the specie on the map as bellow.
# reading the data from the csv file
dataset <- read.csv('PresenceAbs/Strombosia.csv', header = T, sep = '\t')
names(dataset)
## [1] "gbifID" "datasetKey"
## [3] "occurrenceID" "kingdom"
## [5] "phylum" "class"
## [7] "order" "family"
## [9] "genus" "species"
## [11] "infraspecificEpithet" "taxonRank"
## [13] "scientificName" "verbatimScientificName"
## [15] "verbatimScientificNameAuthorship" "countryCode"
## [17] "locality" "stateProvince"
## [19] "occurrenceStatus" "individualCount"
## [21] "publishingOrgKey" "decimalLatitude"
## [23] "decimalLongitude" "coordinateUncertaintyInMeters"
## [25] "coordinatePrecision" "elevation"
## [27] "elevationAccuracy" "depth"
## [29] "depthAccuracy" "eventDate"
## [31] "day" "month"
## [33] "year" "taxonKey"
## [35] "speciesKey" "basisOfRecord"
## [37] "institutionCode" "collectionCode"
## [39] "catalogNumber" "recordNumber"
## [41] "identifiedBy" "dateIdentified"
## [43] "license" "rightsHolder"
## [45] "recordedBy" "typeStatus"
## [47] "establishmentMeans" "lastInterpreted"
## [49] "mediaType" "issue"
# Data cleaning
dataset <- dataset[,c(13,23,22)]
names(dataset)[2:3] <- c('lon','lat')
dataset <- subset(dataset, (!is.na(lon) | !is.na(lat)) & (lon!= 0 & lat != 0))
dataset <- subset(dataset[!duplicated(dataset[,2:3]),], lon < 1 & lon > - 3.2)
dataKhaya <- dataset
species <- dataKhaya[1,1]
# ---Or use Ghana map
plot(gha_data, col = 'light yellow', main = paste(species,"Presences in Ghana"))
points(dataset$lon, dataset$lat, col="orange",pch=20, cex=1)
We use the baseline environmental data to extract the environment conditions of the specie presence.
# We check the crs format of the covariates and the species coordinates
presence@crs
## Coordinate Reference System:
## Deprecated Proj.4 representation: +proj=longlat +datum=WGS84 +no_defs
## WKT2 2019 representation:
## GEOGCRS["WGS 84 (with axis order normalized for visualization)",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433,
## ID["EPSG",9122]]],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433,
## ID["EPSG",9122]]]]
The baseline data had the same CRS than the species coordinates (decimal coordinates). We do not need to transform the two formats.
We could extract the raster information into a database as following:
datasetC <- dataset
dataset <- dataset[,2:3]
coordinates(dataset) <- c('lon', 'lat')
proj4string(dataset) <- CRS("+proj=longlat +datum=WGS84 +no_defs +
ellps=WGS84 +towgs84=0,0,0")
# Extraction -----------------------------------------------------
habGha_data <- extract(presence, dataset)
habGha_data <- cbind.data.frame(dataset, as.data.frame(habGha_data))
summary(habGha_data)
## lon lat bio12 bio15
## Min. :-3.055 Min. :4.750 Min. : 978 Min. : 50.00
## 1st Qu.:-2.857 1st Qu.:5.479 1st Qu.:1244 1st Qu.: 60.75
## Median :-2.560 Median :6.366 Median :1438 Median : 65.00
## Mean :-2.220 Mean :6.240 Mean :1490 Mean : 72.43
## 3rd Qu.:-2.199 3rd Qu.:6.690 3rd Qu.:1666 3rd Qu.: 73.25
## Max. : 0.550 Max. :7.609 Max. :2006 Max. :118.00
## mi dm llds
## Min. : 63.00 Min. :1.000 Min. :1.000
## 1st Qu.: 76.00 1st Qu.:2.000 1st Qu.:2.000
## Median : 88.00 Median :3.000 Median :3.000
## Mean : 95.21 Mean :2.893 Mean :2.821
## 3rd Qu.:115.00 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :136.00 Max. :5.000 Max. :4.000
# Extract the center of each cell
myxy <- xyFromCell(presence[[1]], cell = 1:ncell(presence[[1]]))
# Get all values of the environmental variables
myvar <- getValues(presence)
# create a database with location and the values for environmental variables
preEnv <- na.omit(cbind.data.frame(myxy,myvar))
# presence 'absence' matrix
pa <-c(rep(1,nrow(habGha_data)),rep(0,nrow(preEnv)))
# model data
model_data <- rbind.data.frame(habGha_data[,3:7],preEnv[,3:7])
We use the maxent() function for the Maximum Entropy species distribution model from the dismo package. Once the model obtained, we predicted the suitability of the baseline environmental covariates locations. We use the following code:
# MaxEnt prediction -------------------------------------------
# Model for actual data
MSWD <- maxent(x=model_data, p=pa, path=paste(getwd(),"/MaxentResult", sep=""),
args=c("-P", "noautofeature", "nothreshold", "noproduct",
paste("maximumbackground=",nrow(preEnv), sep=""), "noaddsamplestobackground"))
## This is MaxEnt version 3.4.3
# visualisation
plot(MSWD)
response(MSWD)
# Prediction with MaxEnt Model based on extracted data -------------------
MSWD.pred <- predict(MSWD, presence, args="outputformat=raw", sep="",
progress="text")
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
plot(MSWD.pred, main = paste("Suitability of locations in 1975\n for",species))
points(dataset$lon, dataset$lat, col=rgb(0,0,139,255*0.4,maxColorValue = 255),pch=20, cex=1)
Prediction of suitability locations using projected environmental variables
We use the model created bellow to predict the suitability locations in 2055.
# Prediction using the projected data
MSWD.F <- predict(MSWD, future, args="outputformat=raw", sep="",
progress="text")
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
plot(MSWD.F, main = paste("Suitability of locations In 2025\n for",species))
points(dataset$lon, dataset$lat, col="darkblue",pch=20, cex=1)
Combine the two graphs
par(mfrow = c(1,2))
plot(MSWD.pred, main = "BaseLine")
plot(MSWD.F, main = "Projected 2055")