Hierarchical Modelling of Bird Community

Answering how environmental factors such as altitude, rain and random factors (use, dept, farm) affect the bird community.

Read packages

library(readxl) # lee excel
library(plyr)
library(tidyverse)# manipula datos eficientemente
library(openxlsx) # escribe excel
library(sp)
library(magrittr)
library(HMSC)
library(raster)
library(rasterVis)
library(sf) # new spatial package
library(mapview) # plotting maps in html
library(knitr)
library(colorspace)
library(DT)

Read data

Read excel files from GAICA and paste all in aves_full

###################################
###
### AVES
###
###################################

aves_atlantico <- as.data.frame(read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Tablas_GCS/data/GAICA_Aves_2013.xlsx", sheet = "ATLANTICO"))

aves_quindio <- as.data.frame(read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Tablas_GCS/data/GAICA_Aves_2013.xlsx", sheet = "QUINDIO"))

aves_santander <- as.data.frame(read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Tablas_GCS/data/GAICA_Aves_2013.xlsx", sheet = "SANTANDER"))

aves_meta <- as.data.frame(read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Tablas_GCS/data/GAICA_Aves_2013.xlsx", sheet = "META"))

aves_full <- rbind(aves_atlantico, aves_quindio, aves_santander, aves_meta)

Select one order

PASSERIFORMES in this case.

##### select one order
ordenes <- unique(aves_full$ORDEN)
# PASSERIFORMES in this case.
aves_full <- filter(aves_full, ORDEN == ordenes[7])

Data filtering

We ned a table with sites in rows and species (presence absence) in columns. To have this we need some manipulation from the original tables in Darwin core format.

Sites in bird data are point counts (107 sites).

# ave_sp_sitio <- aves_meta %>% group_by(NOMBRE_CIENTIFICO) %>% count(SITIO)
ave_sp_sitio2 <- dlply(aves_full, .(PUNTO_MUESTREO), function(x) x$NOMBRE_CIENTIFICO)

especies <- unique(aves_full$NOMBRE_CIENTIFICO)
sitios <- unique(aves_full$PUNTO_MUESTREO)

site_dept_loc<- aves_full %>% dplyr::select(PUNTO_MUESTREO, LOC, LAT, ALTURA, DEPARTAMENTO, SITIO)%>%  distinct()

ave_sp_sitio3 <- matrix(NA, length(sitios),length(especies))
colnames(ave_sp_sitio3) <- especies
rownames(ave_sp_sitio3) <- sitios

for(i in 1:length(ave_sp_sitio2)){
  ave_sp_sitio3[i,which(colnames(ave_sp_sitio3) %in% unique(ave_sp_sitio2[[i]]))] <- 1
  ############## Change NA by 0
  ind <- which(is.na(ave_sp_sitio3[i,]))
  ave_sp_sitio3[i,ind] <- 0
}

# kable(ave_sp_sitio3)
datatable(ave_sp_sitio3, options = list(pageLength = 10),
          caption = 'Table species by site')

Spatial procesing

Get deptos, altitude and precipitation maps and extract altitude and precipitation for each site.

# Specify sheet with a number or name
# puntos_loc <- ddply(aves_full, .(PUNTO_MUESTREO), function(x) mean(x$LOC))
# puntos_lat <- ddply(aves_full, .(PUNTO_MUESTREO), function(x) mean(x$LAT))

puntos_loc <- as.numeric(site_dept_loc$LOC)
puntos_lat <- as.numeric(site_dept_loc$LAT)
puntos <- cbind.data.frame(sitios, puntos_loc, puntos_lat)

colnames(puntos) <- c("SITIO", "lon", "lat")
coordinates(puntos) <- ~lon+lat
geo<- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
proj4string(puntos) <-  geo

###Create  dataset for land use silvopastoril ########################
colmap <- getData('GADM', country='COL', level=2) # get colombia map
Meta <- colmap[colmap$NAME_1 == "Meta",]
Santander <- colmap[colmap$NAME_1 == "Santander",]
Quindio <- colmap[colmap$NAME_1 == "Quindío",]
Atlantico <- colmap[colmap$NAME_1 == "Atlántico",]

# Cubarral <- colmap[Meta$NAME_2 == "San Luis de Cubarral",]
# SanMartin <- colmap[Meta$NAME_2 == "San Martín",]
# twopol <- Meta[Meta$NAME_2 %in% c("San Martín", "San Luis de Cubarral"),]

# Paste deptos
twopol <- colmap[colmap$NAME_1 %in% c("Meta", "Santander", "Quindío", "Atlántico" ),]

# Get altitude and precipitation from worldclim
altitud_Meta <- getData('SRTM', lon=mean(extent(Meta)[1:2]), lat=mean(extent(Meta)[3:4]))  
altitud_Meta <- mask(altitud_Meta, Meta)
prec_Meta <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Meta)[1:2]), lat=mean(extent(Meta)[3:4]))
prec_Meta <- mask(prec_Meta[[6]], Meta)# precipitacion junio

altitud_Santander <- getData('SRTM', lon=mean(extent(Santander)[1:2]), lat=mean(extent(Santander)[3:4]))  
altitud_Santander <- mask(altitud_Santander, Santander)
prec_Santander <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Santander)[1:2]), lat=mean(extent(Santander)[3:4]))
prec_Santander <- mask(prec_Santander[[6]], Santander)# precipitacion junio

altitud_Quindio <- getData('SRTM', lon=mean(extent(Quindio)[1:2]), lat=mean(extent(Quindio)[3:4]))  
altitud_Quindio <- mask(altitud_Quindio, Quindio)
prec_Quindio <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Quindio)[1:2]), lat=mean(extent(Quindio)[3:4]))
prec_Quindio <- mask(prec_Quindio[[6]], Quindio)# precipitacion junio

altitud_Atlantico <- getData('SRTM', lon=min(extent(Atlantico)[1:2]), lat=min(extent(Atlantico)[3:4]))  
altitud_Atlantico <- mask(altitud_Atlantico, Atlantico)
prec_Atlantico <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Atlantico)[1:2]), lat=mean(extent(Atlantico)[3:4]))
prec_Atlantico <- mask(prec_Atlantico[[6]], Atlantico)# precipitacion junio

prec_list <- list(prec_Atlantico, prec_Quindio, prec_Santander, prec_Meta)
alti_list <- list(altitud_Atlantico, altitud_Quindio, altitud_Santander, altitud_Meta)



# add arguments such as filename
# x$filename <- 'test.tif'
prec_all <- do.call(merge, prec_list)
alti_all <- do.call(merge, alti_list)

# # - crop an area by extent
# e = extent(raster(
#   xmn = -75.000,
#   xmx = -70,
#   ymn = 1,
#   ymx = 12.000
# ))
# 
# prec_all = crop(prec_all, e)
# alti_all = crop(alti_all, e)
#    
# 
# plot(alti_all)
# plot(twopol, add=T)
# plot(puntos, add=T, col="red")

##Create an X matrix of covariates for each data point, land use as one of the covariates and bioclim variables.

# env<-stack(prec_Meta, altitud_Meta)
env1<-extract(alti_all, puntos)
env2<-extract(prec_all, puntos)

Xmat<-data.frame(alti= site_dept_loc$ALTURA, #env1, 
                 precip=env2) #prec_Meta dry_altitud_Meta

rownames(Xmat)<-sitios

# rec_Atlantico, prec_Quindio, prec_Santander, prec_Meta

##### plot
par(mfrow=c(4,2))

plot(Atlantico, main="Elevation Atlantico")
plot(alti_list[[1]], add=T)
plot(puntos, add=T)

plot(Atlantico, main="Mean Rainfall Atlantico")
plot(prec_list[[1]], col=brewer.pal('RdBu', n=10),add=T)
plot(puntos, add=T)

plot(Quindio, main="Elevation Quindio")
plot(alti_list[[2]], add=T)
plot(puntos, add=T)

plot(Quindio, main="Mean Rainfall Quindio")
plot(prec_list[[2]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)

plot(Santander, main="Elevation Santander")
plot(alti_list[[3]], add=T)
plot(puntos, add=T)

plot(Santander, main="Mean Rainfall Santander")
plot(prec_list[[3]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)

plot(Meta, main="Elevation Meta")
plot(alti_list[[4]], add=T)
plot(puntos, add=T)

plot(Meta, main="Mean Rainfall Meta")
plot(prec_list[[4]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)

Add deptos and use type as random factors

##random effect matrix

deptos <- site_dept_loc$DEPARTAMENTO

Rmat<- cbind.data.frame(deptos, sitios)
Auto_mat <- as.data.frame(puntos)

# uso <- site_dept_loc$Uso
# finca <- site_dept_loc$Uso




# kable(Rmat)
datatable(Rmat, options = list(pageLength = 10),
          caption = 'Table of random effects')
# kable(cbind(site_dept_loc ,Xmat), caption = "Table of environmental factors")

HMSC Modelling aproach

Hierarchical Modelling of Species Communities (HMSC) is a model-based approach for analyzing community ecological data (Ovaskainen et a. 2017a). The obligatory data for HMSC-analyses includes a matrix of species occurrences or abundances and a matrix of environmental covariates. Optional data include information about species traits and phylogenetic relationships, and information about the spatiotemporal context of the sampling design. HMSC partitions variation in species occurrences to components that relate to environmental filtering, species interactions, and random processes. HMSC yields inference both at species and community levels.

The comity is modeled taking several factors in to account:

image Otzo

image Otzo

linked by the generalized linear model: \[ L_{ij}^{F}= \sum_{k} x_{ik} \beta_{ik} \]

where the term \(x\) denotes the environmental covariate \(k\) measured at site \(i\), multiplied by the regression parameter \(\beta\) wich denotes the response of species \(j\) to covariate \(k\).

To answer how species respond to their environment (\(\beta\)), we modeled their distribution as a regression parameter of a multivariate normal distribution (probit type).

The model is parametized in a bayesian framework using the package HMSC in the statistical software R.

More mathematical details in: http://onlinelibrary.wiley.com/doi/10.1111/ele.12757/full

##organize all data required for the model in a single object
data4HMSC<-as.HMSCdata(Y=ave_sp_sitio3, 
                       X=Xmat,
                       Random=Rmat,
                       Auto=Auto_mat,
                       scaleX=T)
## [1] "names were added to each section of 'Auto'"
test1<-hmsc(data4HMSC, family="probit", 
            niter=2000, nburn=1000, thin=5,
            verbose = 100)
## [1] "'paramAutoDist' and 'paramAutoWeight' were defined, if one was not NULL it was overwritten"
## [1] "The priors for the latent variables should be OK for probit models but not necessarily for other models, be careful"
## [1] "The priors for the autocorrelated latent variables should be OK for probit models but not necessarily for other models, be careful"
## [1] "The priors for the latent variables should be OK for probit models but not necessarily for other models, be careful"
## iteration 100
## iteration 200
## iteration 300
## iteration 400
## iteration 500
## iteration 600
## iteration 700
## iteration 800
## iteration 900
## iteration 1000
## iteration 1100
## iteration 1200
## iteration 1300
## iteration 1400
## iteration 1500
## iteration 1600
## iteration 1700
## iteration 1800
## iteration 1900

Results

Marginal posterior distributions of the parameters.

#===============
### Plot results
#===============
#---------------------
## Plot results paramX
#---------------------
### Average

### Mixing object
mixing<-as.mcmc(test1,parameters="paramX")

### Draw trace and density plots for all combination of paramters
# plot(mixing)

### Convert the mixing object to a matrix
mixingDF<-as.data.frame(mixing)

### Draw boxplot for each parameters
# par(mfrow=c(3,1))

par(mar=c(18,3,1,1))
boxplot(mixingDF[,1:30],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")

par(mar=c(18,3,1,1))
boxplot(mixingDF[,31:60],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")

# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF[,61:102],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")

# dev.off()
# ### Draw beanplots
# library(beanplot)
# par(mar=c(7,4,4,2))
# beanplot(mixingDF,las=2)

variance partitioning for the model

Importance of each covariate for each species in the model.

#-----------------------------------------------------------
### Plot random effect estimation through correlation matrix
#-----------------------------------------------------------



variationPart <- variPart(test1, c("Intercept", "alti",
                                   "precip"))
par(mar=c(10,3,7,1))
#par(mar=c(3,3,5,1))
barplot(t(variationPart)[,1:10], las=2, cex.names=0.75, cex.axis=0.75,
        col=rainbow_hcl(5),
        legend.text=paste(colnames(variationPart)," ",
                          signif(100*colMeans(variationPart),2),"%",sep=""),
        args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))

par(mar=c(10,3,7,1))
barplot(t(variationPart)[,11:21], las=2, cex.names=0.75, cex.axis=0.75,
        col=rainbow_hcl(5),
        legend.text=paste(colnames(variationPart)," ",
                          signif(100*colMeans(variationPart),2),"%",sep=""),
        args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))

par(mar=c(10,3,7,1))
barplot(t(variationPart)[,22:34], las=2, cex.names=0.75, cex.axis=0.75,
        col=rainbow_hcl(5),
        legend.text=paste(colnames(variationPart)," ",
                          signif(100*colMeans(variationPart),2),"%",sep=""),
        args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))

Plotting species correlations and association networks

Species-to-species association matrices identify species pairs showing a positive (red) or negative (blue) association, shown only if association has either sign with at least 75% posterior probability.

### Plotting association networks
#============================================
### Plot random effect estimation through correlation matrix
corMat <- corRandomEff(test1,cor=FALSE)
### Sampling units level
#--------------------------------------------


### Mixing object

### Draw estimated correlation matrix
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
corMat <- corRandomEff(test1, cor = TRUE)
# averageCor <- apply(corMat[, , , 1], 1:2, mean) # sitios
averageCor <- apply(corMat[["Random"]],1:2,mean)# promedio de los dos
corrplot(averageCor, method = "color",
         col = colorRampPalette(c("blue", "white", "red"))(100))

Species correlations by site level.

co-occurence plot

By averaging site and deptos

library(circlize)
## Warning: package 'circlize' was built under R version 3.4.1
corMat <- corRandomEff(test1, cor = TRUE)
# averageCor <- apply(corMat[, , , 1], 1:2, mean) 
# averageCor <- apply(corMat[["Random"]][,,,1],1:2,mean)#deptos 
averageCor <- apply(corMat[["Random"]],1:2,mean)# promedio de los dos
colMat <- matrix(NA, nrow = nrow(averageCor), 
                 ncol = ncol(averageCor))
colMat[which(averageCor > 0.4, arr.ind = TRUE)] <- "red"
colMat[which(averageCor < -0.4, arr.ind = TRUE)] <- "blue"


chordDiagram(averageCor, symmetric = TRUE, 
             grid.col = "gray", 
             col=colMat, annotationTrack = "grid",
             annotationTrackHeight = c(0.03, 0.01),
    preAllocateTracks = list(track.height = max(strwidth(unlist(dimnames(averageCor))))))
# we go back to the first track and customize sector labels
circos.track(track.index = 1, panel.fun = function(x, y) {
    circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, 
        facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA) # here set bg.border to NA is important

Ymean <- apply(test1$data$Y, 2, mean)
R2 <- Rsquared(test1, averageSp=FALSE)
plot(Ymean, R2, pch=19, main=paste('Mean R2 value over species',
signif(mean(Ymean),2)))

# For a particular set of parameter, this given directly by the hmsc function
# For example for the beta (paramX)
# test1$results$estimation$paramX
# For the full joint probability distribution
fullPost <- jposterior(test1)
predTrain <- predict(test1)



# ## R  SCRIPT
# ### Simulating "validation" data
# X <- matrix(nrow = 10, ncol = 3)
# colnames(X) <- colnames(alti$X)
# X[, 1] <- 1
# X[, 2] <- rnorm(10)
# X[, 3] <- rnorm(10)
# RandomSel <- sample(200, 10)
# Random <- alti$Random[RandomSel, ]
# for(i in 1:ncol(Random)){
# Random[, i] <- as.factor(as.character(Random[, i]))
# }
# colnames(Random) <- colnames(alti$Random)
# dataVal <- as.HMSCdata(X = X, Random = Random)
# ### Prediction for a new set of values
# predVal <- predict(test1, dataVal)

R Session info

sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 14393)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Colombia.1252  LC_CTYPE=Spanish_Colombia.1252   
## [3] LC_MONETARY=Spanish_Colombia.1252 LC_NUMERIC=C                     
## [5] LC_TIME=Spanish_Colombia.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] circlize_0.4.1            corrplot_0.84            
##  [3] bindrcpp_0.2              DT_0.2                   
##  [5] colorspace_1.3-2          knitr_1.17               
##  [7] mapview_2.1.4             leaflet_1.1.0            
##  [9] sf_0.5-5                  rasterVis_0.41           
## [11] latticeExtra_0.6-28       RColorBrewer_1.1-2       
## [13] lattice_0.20-35           raster_2.5-8             
## [15] HMSC_2.0-10               RcppArmadillo_0.8.100.1.0
## [17] Rcpp_0.12.13              coda_0.19-1              
## [19] MASS_7.3-47               magrittr_1.5             
## [21] sp_1.2-5                  openxlsx_4.0.17          
## [23] dplyr_0.7.4               purrr_0.2.4              
## [25] readr_1.1.1               tidyr_0.7.2              
## [27] tibble_1.3.4              ggplot2_2.2.1            
## [29] tidyverse_1.1.1           plyr_1.8.4               
## [31] readxl_1.0.0             
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-131         satellite_1.0.1      lubridate_1.7.0     
##  [4] webshot_0.4.2        httr_1.3.1           rprojroot_1.2       
##  [7] tools_3.4.0          backports_1.1.1      rgdal_1.2-15        
## [10] R6_2.2.2             DBI_0.7              lazyeval_0.2.1      
## [13] mnormt_1.5-5         compiler_3.4.0       rvest_0.3.2         
## [16] xml2_1.1.1           scales_0.5.0         classInt_0.1-24     
## [19] hexbin_1.27.1        psych_1.7.8          stringr_1.2.0       
## [22] digest_0.6.12        foreign_0.8-67       rmarkdown_1.6       
## [25] R.utils_2.5.0        base64enc_0.1-3      pkgconfig_2.0.1     
## [28] htmltools_0.3.6      GlobalOptions_0.0.12 htmlwidgets_0.9     
## [31] rlang_0.1.2          shiny_1.0.5          shape_1.4.3         
## [34] bindr_0.1            zoo_1.8-0            jsonlite_1.5        
## [37] crosstalk_1.0.0      R.oo_1.21.0          munsell_0.4.3       
## [40] R.methodsS3_1.7.1    stringi_1.1.5        yaml_2.1.14         
## [43] grid_3.4.0           parallel_3.4.0       forcats_0.2.0       
## [46] udunits2_0.13        haven_1.1.0          hms_0.3             
## [49] gdalUtils_2.0.1.7    reshape2_1.4.2       codetools_0.2-15    
## [52] stats4_3.4.0         glue_1.2.0           evaluate_0.10.1     
## [55] modelr_0.1.1         png_0.1-7            httpuv_1.3.5        
## [58] foreach_1.4.3        cellranger_1.1.0     gtable_0.2.0        
## [61] assertthat_0.2.0     mime_0.5             xtable_1.8-2        
## [64] broom_0.4.2          e1071_1.6-8          class_7.3-14        
## [67] viridisLite_0.2.0    iterators_1.0.8      units_0.4-6