A Bayesian geostatistical approach to modelling global distributions of Lygodium microphyllum under projected climate warming

Authors:

John M. Humphreys (jmh09r@my.fsu.edu)
James B. Elsner
Thomas H. Jagger
Stephanie Pau

Portions of this study have been submitted for publication.
Please contact authors at email address above to cite.

Model Fitting

Model fitting and results available here: https://rpubs.com/JMHumphreys/LygPA020217

Data Pre-processing

date()

## [1] "Mon Mar 06 23:20:37 2017"

Options

library(knitr)
library(rgl)
opts_knit$set(verbose = FALSE)
knit_hooks$set(webgl = hook_webgl)

Load Packages:

suppressMessages(library(INLA))
suppressMessages(library(rgdal))
suppressMessages(library(reshape2))
suppressMessages(library(raster))
suppressMessages(library(fields))
suppressMessages(library(rasterVis))
suppressMessages(library(rgeos))
suppressMessages(library(Thermimage))
suppressMessages(library(maptools))
suppressMessages(library(mapproj))
suppressMessages(library(spdep))
suppressMessages(library(ggplot2))
suppressMessages(library(GISTools))
suppressMessages(library(lattice))
suppressMessages(library(gridExtra))
suppressMessages(library(dismo))
suppressMessages(library(dplyr))

Set Directory

setwd("D:/Lygodium/LygProject/LygProject")

Get Point Data

This dataset includes multiple species.

Lyg.df = read.csv("D:/Lygodium/GBIF_data/MDat/REvCoord/LygMicro011917.csv", 
                  header = TRUE, sep=",")

dim(Lyg.df)

## [1] 25016     5

head(Lyg.df)

##   X               Species    Lat     Long Source
## 1 1 Lygodium microphyllum -32.47 152.2200      0
## 2 2 Lygodium microphyllum -32.30 152.1800      0
## 3 3 Lygodium microphyllum -32.30 152.1833      0
## 4 4 Lygodium microphyllum -32.30 152.1833      0
## 5 5 Lygodium microphyllum -30.87 152.1500      0
## 6 6 Lygodium microphyllum -30.72 152.9200      0

Get Country Boundaries

Countries = map("world", 
                fill = TRUE, 
                plot = FALSE)


IDs = sapply(strsplit(Countries$names, ":"), function(x) x[1])

LL84 = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

CountriesP = map2SpatialPolygons(Countries, IDs = IDs,
                                proj4string = CRS(projection(LL84 )))

#Add a dataframe  
pid = sapply(slot(CountriesP, "polygons"), 
             function(x) slot(x, "ID"))

p.df = data.frame( ID=1:length(CountriesP), 
                   row.names = pid)

Countries = SpatialPolygonsDataFrame(CountriesP, p.df)


#Rasterized version  
Ras = raster(res = 0.9,
             crs = proj4string(Countries))

Domain.r = rasterize(Countries, Ras, 
                     field = 0, 
                     background = NA)

Geographic Thinning

r = raster(res = 0.042)

myLL = cbind(Lyg.df$Long, Lyg.df$Lat)
    
set.seed(1976)
DFSamp = as.data.frame(gridSample(myLL, r, n = 1))
names(DFSamp) = c("Long", "Lat")

Remove Observations Over the Ocean

LL = cbind(DFSamp$Long, DFSamp$Lat)
LygC = SpatialPointsDataFrame(LL, DFSamp)
proj4string(LygC) = LL84
 
N = length(LygC[,1])


###Remove Observations Over Oceans
LygC$ID = !is.na(over(LygC, Countries))[,1]
LygC = subset(LygC, ID == TRUE )

LygC@data = LygC@data[,1:3]

dim(LygC)

## [1] 1549    3

plot(Countries, col="tan", main = "Occurence Observations")
plot(LygC, cex=0.1, pch=19, col="red", add=TRUE)

Stochastic Partial Differential Equations (SPDE)

Construct a mesh for the global domain (“mesh1”)

The mesh is constructed to have a greater number of triangulation over terrestrial regions.

CountriesU = gBuffer(Countries, width = 1) #Smoothing edges

MaxEdge = 2.0

bdry = inla.sp2segment(CountriesU)

mesh2D = inla.mesh.2d(boundary = bdry, 
                     cutoff = 1, 
                     max.edge = MaxEdge,
                     min.angle = 21) 

MeshLocs = cbind(mesh2D$loc[,1], mesh2D$loc[,2]) #Locations of mesh nodes

#Converting to 3D coordinates
xyz = as.data.frame(                
          inla.mesh.map(MeshLocs, 
                        projection = "longlat", 
                        inverse = TRUE))

true.radius.of.earth = 6371
radius.of.earth = 1

mesh1 = inla.mesh.create(loc = xyz,
                         cutoff = 400/true.radius.of.earth, 
                         refine=list(max.edge = 3500/true.radius.of.earth, min.angle = 26))

mesh1$n

## [1] 1284

plot(mesh1, rgl = TRUE, main = " ")

You must enable Javascript to view this page properly.

Quadrature Points

A dense grid of points is created over terrestrial portions of the global domain. These will be combined with the mesh node locations below and used as quadrature.

Grid.pnts = rasterToPoints(Domain.r, sp = TRUE)
                                      
Grid.pnts@data = Grid.pnts@data %>%
                  mutate(Long = Grid.pnts@coords[,1],
                         Lat = Grid.pnts@coords[,2],
                         OBS = 0,
                         Species = "Quad") %>%
                  select(-layer)

Additional quadrature points are drawn from each of the mesh vertices. Having both quadrature points located at mesh vertices helps avoid numerical issues.

dd = as.data.frame(cbind(mesh1$loc[,1], 
                         mesh1$loc[,2],
                         mesh1$loc[,3]))

#Convert to 2D long-lat to combine with quad points from the "dense grid"
dd = as.data.frame(
          inla.mesh.map(dd, 
                        projection = "longlat", 
                        inverse = FALSE))

names(dd) = c("Long", "Lat")


O.pnts = SpatialPointsDataFrame(dd, dd)
proj4string(O.pnts) = proj4string(LygC)
O.pnts = spTransform(O.pnts, CRS(proj4string(LygC)))


O.pnts@data = O.pnts@data %>%
              mutate(Long = O.pnts@coords[,1],
                     Lat = O.pnts@coords[,2],
                     OBS = 0,
                     Species = "Mesh") 

Int.pnts = spRbind(Grid.pnts, O.pnts)

Southeastern United States

A dense point grid is also created over covering the southeastern U.S. These locations will be used for later prediction.

This area includes states along the southeastern coastal plain (Florida, North Carolina, South Carolina, Georgia, Alabama, Mississippi, and Louisiana) as well as Arkansas and Tennessee.

Get State Boundaries

ext = c(-91.29756, -77.76994,
         24.54233, 35.54766)

States = map("state", 
            fill = TRUE,
            xlim = ext[1:2],
            ylim = ext[3:4],
            plot = FALSE)

IDs = sapply(strsplit(States$names, ":"),
             function(x) x[1])

StatesP = map2SpatialPolygons(States, IDs = IDs,
                              proj4string = CRS(projection(LygC)))

Rasterize and Create Points

SEStates.r = raster(res = 0.75)
extent(SEStates.r) = extent(StatesP)

SEStates.r = rasterize(StatesP, 
                       SEStates.r, 
                       field = 0, 
                       background = NA)

SEUS.pnts = rasterToPoints(SEStates.r, sp = TRUE)

SEUS.pnts@data = SEUS.pnts@data %>%
                  mutate(Long = SEUS.pnts@coords[,1],
                         Lat = SEUS.pnts@coords[,2],
                         OBS = 0,
                         Species = "SEUS") %>%
                  select(-layer)

Quad.pnts = spRbind(Int.pnts, SEUS.pnts)

South East Asia and Oceania

A dense point grid is also created over covering the SE Asia and Oceania. These locations will be used for later prediction.

Get Country Boundaries

coords = matrix(c(85, 40, 
                  183, 40,
                  183, -50,
                  85, -50,
                  85, 40), 
                 ncol = 2, 
                 byrow = TRUE)


SEApoly = Polygon(coords)
SEApoly = SpatialPolygons(list(Polygons(list(SEApoly), ID = "a")), 
                      proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))

SEAsia.sp = crop(Countries, SEApoly)

####Rasterize and Create Points       
SEAsia.r = raster(res = 0.4)
extent(SEAsia.r) = extent(SEAsia.sp)

SEAsia.r = rasterize(SEAsia.sp, 
                     SEAsia.r, 
                     field = 0, 
                     background = NA)

SEAsia.pnts = rasterToPoints(SEAsia.r, sp = TRUE)

SEAsia.pnts@data = SEAsia.pnts@data %>%
                      mutate(Long = SEAsia.pnts@coords[,1],
                             Lat = SEAsia.pnts@coords[,2],
                             OBS = 0,
                             Species = "SEAsia") %>%
                      select(-layer)

Quad.pnts = spRbind(Quad.pnts, SEAsia.pnts)

Mexico and Central America

These locations will be used for later prediction.

Get Country Boundaries

coords = matrix(c(-109, 24.5, 
                  -55, 24.5,
                  -55, -5,
                  -109, -5,
                  -109, 24.5), 
                 ncol = 2, 
                 byrow = TRUE)

MCpoly = Polygon(coords)
MCpoly = SpatialPolygons(list(Polygons(list(MCpoly), ID = "a")), 
                      proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))

MC.sp = crop(Countries, MCpoly)

####Rasterize and Create Points       
MC.r = raster(res = 0.6)
extent(MC.r) = extent(MC.sp)

MC.r = rasterize(MC.sp, 
                 MC.r, 
                 field = 0, 
                 background = NA)

MC.pnts = rasterToPoints(MC.r, sp = TRUE)

MC.pnts@data = MC.pnts@data %>%
                      mutate(Long = MC.pnts@coords[,1],
                             Lat = MC.pnts@coords[,2],
                             OBS = 0,
                             Species = "MC") %>%
                      select(-layer)

Quad.pnts = spRbind(Quad.pnts, MC.pnts)

Combine Observation, Quadrature, and Prediction Points

LygC@data = LygC@data %>%
              mutate(OBS = 1,
                     Species = "Lmic") %>%
              select(-ID)

LygPA.df = rbind(LygC@data, Quad.pnts@data)

row.names(LygPA.df) = 1:nrow(LygPA.df)

LygPA = SpatialPointsDataFrame(cbind(LygPA.df$Long, 
                                     LygPA.df$Lat), 
                               LygPA.df)

proj4string(LygPA) = CRS(proj4string(LygC))

LygPA@data = LygPA@data %>%
              mutate(Long = as.numeric(LygPA@coords[,1]),
                     Lat = as.numeric(LygPA@coords[,2]))

Extract Potential Covariates

Climate data

Source: http://www.worldclim.org/

Bio = getData('worldclim',     #Current climate data (averages 1950 - 2000)
                var='bio',
                  res=2.5)


Bio70 = getData('CMIP5',       #Projected climate data, 8.5 RCP for the year 2070
                var='bio',
                    res=2.5, 
                    rcp=85, 
                    model='GS', 
                    year=70)

Population Density

Population Density
http://sedac.ciesin.columbia.edu/data/set/gpw-v3-population-density

Pop = raster("D:/Lygodium/gl_gpwv3_pdens_00_wrk_25/gldens00/glds00g/w001001.adf")

Compound Topographic Index (CTI)

Also known as a “wetness index”. Calculated as the natural log of the upstream contributing area divided by the tangent of the slope for each cell.

CTI = raster("D:/BulkData_dl/Global_LC/SRTMindx/cti.tif")

Extract Covaraites to Point Locations

Also scaling values.

LygPA@data = LygPA@data %>%
              mutate(mTcm = extract(Bio$bio6, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(Bio$bio6))),
                                   method = "simple")/100,
                     mTcm70 = extract(Bio70$gs85bi706, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(Bio70$gs85bi706))),
                                   method = "simple")/100,
                     Pwq = extract(Bio$bio16, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(Bio$bio16))),
                                   method = "simple")/1000,
                     Pwq70 = extract(Bio70$gs85bi7016, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(Bio70$gs85bi7016))),
                                   method = "simple")/1000,
                     Pop = extract(Pop, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(Bio))),
                                   method = "simple")/1000,
                     CTI = extract(CTI, 
                                   spTransform(LygPA, 
                                   CRS(proj4string(CTI))), 
                                   method = "simple")/10)

Imputing Data

df1 = LygPA@data

df1$Species = as.factor(df1$Species)

NEW = as.data.frame(
        df1 %>%
        group_by(Species) %>%
        mutate_each(funs(replace(., which(is.na(.)), 
                                 mean(., na.rm=TRUE)))))[, 5:10]

apply(NEW, 2, range)

##       mTcm mTcm70   Pwq Pwq70      Pop       CTI
## [1,] -5.43  -5.08 0.000 0.000  0.00000 0.9435256
## [2,]  2.55   2.73 5.564 6.135 34.23955 2.5704475

LygPA@data = cbind(LygPA@data[,1:4], NEW)

Nearest Neighbor Distance

Distance to nearest plant occurrence.

suppressMessages(library(spatstat))
nProj = "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0
         +x_0=0 +y_0=0 +k=1 +units=km +nadgrids=@null +no_defs"

P.pp = spTransform(LygPA, nProj)

By = as.factor(LygPA$Species)
P.pp = as.ppp(P.pp)

NN = (nndist(P.pp, by=By, k = 1))

LygPA$nLmic = NN[,"Lmic"]

range(LygPA$nLmic)

## [1] 5.456968e-12 2.580752e+04

LygPA$NN = LygPA$nLmic/1000 #Scale
LygPA$NN = round(LygPA$NN, 2) #Rounding

Subset Prediction Stack

Now that covariates have been collected for all locations, the set of points to be used for prediction are separated from those to be used for initial model fitting.

SECP.pnts = subset(LygPA, Species == "SEUS") #Southeastern U.S. for prediction 

SEAsia.pnts = subset(LygPA, Species == "SEAsia") #SE Asia and Oceania for prediction

MC.pnts = subset(LygPA, Species == "MC") #Mexico and Central America  

World.pnts = subset(LygPA, Species != "SEUS" &
                           Species != "SEAsia" &
                           Species != "MC") #Global occurences and quadrature #26059

Remove Duplicates

Removing quadrature points that are redundant to locations of mesh nodes.

World.pnts$dups = duplicated(World.pnts@data[, c("Long","Lat")])

sum(World.pnts$dups) #Number of duplicates.

## [1] 1

LygPA = subset(World.pnts, dups == FALSE)

Assessing Covarites for Possible Colinearity Issues

Colldiag() is an implementation of the regression collinearity diagnostic procedures found in Belsley, Kuh, and Welsch (1980). These procedures examine the “conditioning” of the matrix of independent variables.

Colldiag() computes the condition indexes of the matrix. If the largest condition index (the condition number) is large (Belsley et al suggest 30 or higher), then there may be collinearity issues.

Colin.df = LygPA@data %>%
            select(mTcm, Pwq, NN, Pop, CTI)

suppressMessages(library(perturb))

CI = colldiag(Colin.df)

CI

## Condition
## Index    Variance Decomposition Proportions
##           intercept mTcm  Pwq   NN    Pop   CTI  
## 1   1.000 0.001     0.009 0.013 0.021 0.004 0.001
## 2   1.698 0.000     0.184 0.040 0.023 0.221 0.000
## 3   2.016 0.000     0.096 0.031 0.003 0.771 0.000
## 4   3.590 0.001     0.387 0.033 0.903 0.003 0.001
## 5   4.542 0.009     0.280 0.808 0.048 0.001 0.015
## 6  25.130 0.989     0.043 0.075 0.003 0.000 0.983

detach("package:perturb", unload=TRUE)

Session Info:

sessionInfo()

## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 8.1 x64 (build 9600)
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] spatstat_1.47-0     rpart_4.1-10        nlme_3.1-128       
##  [4] dplyr_0.5.0         dismo_1.1-1         gridExtra_2.2.1    
##  [7] GISTools_0.7-4      MASS_7.3-45         ggplot2_2.2.0      
## [10] spdep_0.6-8         mapproj_1.2-4       maptools_0.8-40    
## [13] Thermimage_2.2.3    rgeos_0.3-21        rasterVis_0.41     
## [16] latticeExtra_0.6-28 RColorBrewer_1.1-2  lattice_0.20-34    
## [19] fields_8.4-1        maps_3.1.1          spam_1.4-0         
## [22] raster_2.5-8        reshape2_1.4.2      rgdal_1.2-4        
## [25] INLA_0.0-1482340637 Matrix_1.2-7.1      sp_1.2-3           
## [28] rgl_0.96.0          knitr_1.15.1       
## 
## loaded via a namespace (and not attached):
##  [1] jsonlite_1.1      viridisLite_0.1.3 splines_3.3.2    
##  [4] gtools_3.5.0      shiny_0.14.2      assertthat_0.1   
##  [7] LearnBayes_2.15   backports_1.0.4   digest_0.6.10    
## [10] polyclip_1.5-6    colorspace_1.3-2  htmltools_0.3.5  
## [13] httpuv_1.3.3      plyr_1.8.4        gmodels_2.16.2   
## [16] xtable_1.8-2      scales_0.4.1      gdata_2.17.0     
## [19] tensor_1.5        tibble_1.2        mgcv_1.8-15      
## [22] hexbin_1.27.1     lazyeval_0.2.0    magrittr_1.5     
## [25] mime_0.5          deldir_0.1-12     evaluate_0.10    
## [28] foreign_0.8-67    tools_3.3.2       stringr_1.1.0    
## [31] munsell_0.4.3     htmlwidgets_0.8   goftest_1.0-3    
## [34] rmarkdown_1.2     boot_1.3-18       gtable_0.2.0     
## [37] abind_1.4-5       DBI_0.5-1         markdown_0.7.7   
## [40] R6_2.2.0          zoo_1.7-13        rprojroot_1.1    
## [43] stringi_1.1.2     parallel_3.3.2    Rcpp_0.12.8      
## [46] coda_0.19-1