Answering how environmental factors such as altitude, rain and random factors (use, dept, farm) affect beetle community.
library(readxl) # lee excel
library(plyr)
library(tidyverse)# manipula datos eficientemente
library(openxlsx) # escribe excel
library(sp)
library(magrittr)
library(HMSC)
library(raster)
library(rasterVis)
library(sf) # new spatial package
library(mapview) # plotting maps in html
library(knitr)
library(colorspace)
Read excel files from GAICA and paste all in beetle_full
###################################
###
### Beetle
###
###################################
beetle_full <- as.data.frame(read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Tablas_GCS/data/GAICA_Escarabajos_2013.xlsx", sheet = "Full"))
no in this case.
##### select one order
ordenes <- unique(beetle_full$Orden)
generos <- unique(beetle_full$Genero)
especies <- unique(beetle_full$Nombre_cientifico)
transect_fincas <- unique(beetle_full$Localidad_original)
beetle_full$sitio_habitat <- paste(beetle_full$Localidad_original,
beetle_full$Habitat, sep = "")
site_dept_loc <- beetle_full %>% dplyr::select(Longitud_decimal, Latitud_decimal, Uso, Departamento)%>% distinct() # igual a site_dept_loc<-
# beetle_full <- filter(beetle_full, ORDEN == ordenes[1])
site_dept_loc$id <- c(1:length(site_dept_loc[,2]))
sitios<- site_dept_loc$id
We need a table with sites in rows and species (presence absence) in columns. To have this we need some manipulation from the original tables in Darwin core format. For the beetle data set we use the trap, transect, finca as site.
# ave_sp_sitio <- beetle_meta %>% group_by(NOMBRE_CIENTIFICO) %>% count(SITIO)
beetle_sp_sitio2 <- dlply(beetle_full, .(Longitud_decimal,
Latitud_decimal,
Uso),
function(x) x$Nombre_cientifico)
# site_dept_loc<- beetle_full %>% dplyr::select(Localidad_original, Longitud_decimal, Latitud_decimal, Habitat, Departamento )%>% distinct() ##
site_hab<- beetle_full %>% dplyr::select(Latitud_decimal, Longitud_decimal, Localidad , Uso, Departamento)%>% distinct() ##
# Delete 116 to equalize 236 sites
site_hab <- site_hab[-116,]
beetle_sp_sitio3 <- matrix(NA, length(sitios),length(especies))
colnames(beetle_sp_sitio3) <- especies
rownames(beetle_sp_sitio3) <- sitios #sitios
for(i in 1:length(beetle_sp_sitio2)){
beetle_sp_sitio3[i,which(colnames(beetle_sp_sitio3) %in% unique(beetle_sp_sitio2[[i]]))] <- 1
############## Change NA by 0
ind <- which(is.na(beetle_sp_sitio3[i,]))
beetle_sp_sitio3[i,ind] <- 0
}
library(DT)
datatable(beetle_sp_sitio3, options = list(pageLength = 10))
# kable(beetle_sp_sitio3)
Get deptos, altitude and precipitation maps and extract altitude and precipitation for each site.
# Specify sheet with a number or name
# puntos_loc <- ddply(beetle_full, .(Longitud_decimal), function(x) mean(x$LOC))
# puntos_lat <- ddply(beetle_full, .(Latitud_decimal), function(x) mean(x$LAT))
puntos <- as.data.frame(site_dept_loc)
# colnames(puntos) <- c("SITIO", "lon", "lat")
coordinates(puntos) <- ~Longitud_decimal+Latitud_decimal
geo<- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
proj4string(puntos) <- geo
###Create dataset for land use silvopastoril ########################
colmap <- getData('GADM', country='COL', level=2) # get colombia map
Meta <- colmap[colmap$NAME_1 == "Meta",]
Santander <- colmap[colmap$NAME_1 == "Santander",]
Quindio <- colmap[colmap$NAME_1 == "QuindÃo",]
Atlantico <- colmap[colmap$NAME_1 == "Atlántico",]
# Cubarral <- colmap[Meta$NAME_2 == "San Luis de Cubarral",]
# SanMartin <- colmap[Meta$NAME_2 == "San MartÃn",]
# twopol <- Meta[Meta$NAME_2 %in% c("San MartÃn", "San Luis de Cubarral"),]
# Paste deptos
twopol <- colmap[colmap$NAME_1 %in% c("Meta", "Santander", "QuindÃo", "Atlántico" ),]
# Get altitude and precipitation from worldclim
altitud_Meta <- getData('SRTM', lon=mean(extent(Meta)[1:2]), lat=mean(extent(Meta)[3:4]))
altitud_Meta <- mask(altitud_Meta, Meta)
prec_Meta <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Meta)[1:2]), lat=mean(extent(Meta)[3:4]))
prec_Meta <- mask(prec_Meta[[6]], Meta)# precipitacion junio
altitud_Santander <- getData('SRTM', lon=mean(extent(Santander)[1:2]), lat=mean(extent(Santander)[3:4]))
altitud_Santander <- mask(altitud_Santander, Santander)
prec_Santander <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Santander)[1:2]), lat=mean(extent(Santander)[3:4]))
prec_Santander <- mask(prec_Santander[[6]], Santander)# precipitacion junio
altitud_Quindio <- getData('SRTM', lon=mean(extent(Quindio)[1:2]), lat=mean(extent(Quindio)[3:4]))
altitud_Quindio <- mask(altitud_Quindio, Quindio)
prec_Quindio <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Quindio)[1:2]), lat=mean(extent(Quindio)[3:4]))
prec_Quindio <- mask(prec_Quindio[[6]], Quindio)# precipitacion junio
altitud_Atlantico <- getData('SRTM', lon=min(extent(Atlantico)[1:2]), lat=min(extent(Atlantico)[3:4]))
altitud_Atlantico <- mask(altitud_Atlantico, Atlantico)
prec_Atlantico <- getData('worldclim', var='prec', res=0.5, lon=mean(extent(Atlantico)[1:2]), lat=mean(extent(Atlantico)[3:4]))
prec_Atlantico <- mask(prec_Atlantico[[6]], Atlantico)# precipitacion junio
prec_list <- list(prec_Atlantico, prec_Quindio, prec_Santander, prec_Meta)
alti_list <- list(altitud_Atlantico, altitud_Quindio, altitud_Santander, altitud_Meta)
# add arguments such as filename
# x$filename <- 'test.tif'
prec_all <- do.call(merge, prec_list)
alti_all <- do.call(merge, alti_list)
# # - crop an area by extent
# e = extent(raster(
# xmn = -75.000,
# xmx = -70,
# ymn = 1,
# ymx = 12.000
# ))
#
# prec_all = crop(prec_all, e)
# alti_all = crop(alti_all, e)
#
#
# plot(alti_all)
# plot(twopol, add=T)
# plot(puntos, add=T, col="red")
##Create an X matrix of covariates for each data point, land use as one of the covariates and bioclim variables.
# env<-stack(prec_Meta, altitud_Meta)
env1<-extract(alti_all, puntos) # altitude from SRTM
env2<-extract(prec_all, puntos) # precipitation
############### Xmat
#### numeric effect
Xmat<-data.frame(alti=env1, #site_dept_loc$Elevacion,
rain=env2)#,
# habitat=site_dept_loc$Habitat) #prec_Meta dry_altitud_Meta
rownames(Xmat)<-sitios
# rec_Atlantico, prec_Quindio, prec_Santander, prec_Meta
##### plot
par(mfrow=c(4,2))
plot(Atlantico, main="Elevation Atlantico")
plot(alti_list[[1]], add=T)
plot(puntos, add=T)
plot(Atlantico, main="Mean Rainfall Atlantico")
plot(prec_list[[1]], col=brewer.pal('RdBu', n=10),add=T)
plot(puntos, add=T)
plot(Quindio, main="Elevation Quindio")
plot(alti_list[[2]], add=T)
plot(puntos, add=T)
plot(Quindio, main="Mean Rainfall Quindio")
plot(prec_list[[2]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)
plot(Santander, main="Elevation Santander")
plot(alti_list[[3]], add=T)
plot(puntos, add=T)
plot(Santander, main="Mean Rainfall Santander")
plot(prec_list[[3]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)
plot(Meta, main="Elevation Meta")
plot(alti_list[[4]], add=T)
plot(puntos, add=T)
plot(Meta, main="Mean Rainfall Meta")
plot(prec_list[[4]], col=brewer.pal('RdBu', n=10), add=T)
plot(puntos, add=T)
### show table
datatable(Xmat, options = list(pageLength = 10),
caption = 'Table of environmental factors')
# kable(cbind(site_dept_loc ,Xmat), caption = "Table of environmental factors")
##random effect matrix
deptos <- site_dept_loc$Departamento
uso <- site_dept_loc$Uso
finca <- site_hab$Localidad
Rmat<-as.data.frame(cbind(deptos, uso, finca))
# kable(Rmat)
datatable(Rmat, options = list(pageLength = 10),
caption = 'Table of random effects')
Hierarchical Modelling of Species Communities (HMSC) is a model-based approach for analyzing community ecological data (Ovaskainen et a. 2017a). The obligatory data for HMSC-analyses includes a matrix of species occurrences or abundances and a matrix of environmental covariates. Optional data include information about species traits and phylogenetic relationships, and information about the spatiotemporal context of the sampling design. HMSC partitions variation in species occurrences to components that relate to environmental filtering, species interactions, and random processes. HMSC yields inference both at species and community levels.
The comity is modeled taking several factors in to account:
image Otzo
linked by the generalized linear model: \[ L_{ij}^{F}= \sum_{k} x_{ik} \beta_{ik} \]
where the term \(x\) denotes the environmental covariate \(k\) measured at site \(i\), multiplied by the regression parameter \(\beta\) which denotes the response of species \(j\) to covariate \(k\).
To answer how species respond to their environment (\(\beta\)), we modeled their distribution as a regression parameter of a multivariate normal distribution (probit type).
The model is parametized in a Bayesian framework using the package HMSC in the statistical software R.
More mathematical details in: http://onlinelibrary.wiley.com/doi/10.1111/ele.12757/full
##organize all data required for the model in a single object
data4HMSC<-as.HMSCdata(Y=beetle_sp_sitio3, X=Xmat,Random=Rmat, scaleX=T, scaleTr = T, interceptTr = FALSE)# Auto=XYmat)
test1<-hmsc(data4HMSC, family="probit", niter=11000,nburn=1000,thin=10)
## [1] "The priors for the latent variables should be OK for probit models but not necessarily for other models, be careful"
## iteration 2200
## iteration 4400
## iteration 6600
## iteration 8800
##########################################
Auto_mat <- cbind(as.factor(sitios),
as.data.frame(coordinates(puntos)))
##organize all data required for the model in a single object
data4HMSC_auto<-as.HMSCdata(Y=beetle_sp_sitio3,
X=Xmat,
Random=Rmat,
Auto=Auto_mat,
scaleX=T, scaleTr = T, interceptTr = F)# Auto=XYmat)
## [1] "names were added to each section of 'Auto'"
test2<-hmsc(data4HMSC_auto, family="probit", niter=11000,nburn=1000,thin=10)
## [1] "'paramAutoDist' and 'paramAutoWeight' were defined, if one was not NULL it was overwritten"
## [1] "The priors for the latent variables should be OK for probit models but not necessarily for other models, be careful"
## [1] "The priors for the autocorrelated latent variables should be OK for probit models but not necessarily for other models, be careful"
## [1] "The priors for the latent variables should be OK for probit models but not necessarily for other models, be careful"
## iteration 2200
## iteration 4400
## iteration 6600
## iteration 8800
The box-plot shows species-specific results showing altitude, rainfall and intercept weight.
#===============
### Plot results
#===============
#---------------------
## Plot results paramX
#---------------------
### Average
### Mixing object
mixing1<-as.mcmc(test1, parameters="paramX")
mixing2 <- as.mcmc(test2, parameters = "paramX")
mixing3 <- as.mcmc(test2, parameters = "paramLatent")
### Convert the mixing object to a matrix
# mixingDF2 <- as.data.frame(mixing[[2]])
### Draw boxplot for each parameters
# par(mar = c(7, 4, 4, 2))
# boxplot(mixingDF2, las = 2)
### Draw trace and density plots for all combination of paramters
# plot(mixing)
### Convert the mixing object to a matrix
mixingDF1<-as.data.frame(mixing1)
mixingDF2<-as.data.frame(mixing2)
mixingDF3<-as.data.frame(mixing3)
### Draw boxplot for each parameters
# par(mfrow=c(3,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,1:40],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,41:80],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,81:120],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,121:160],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,161:200],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,201:240],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# par(mar=c(15,3,1,1))
par(mar=c(18,3,1,1))
boxplot(mixingDF2[,242:264],las=2, col=rgb(0.3,0.5,0.4,0.6))
abline(h=0,col="red")
# dev.off()
# ### Draw beanplots
# library(beanplot)
# par(mar=c(7,4,4,2))
# beanplot(mixingDF,las=2)
Importance of each covariate for each species in the model. The bar-plot shows species-specific results with species ordered according to increasing prevalence, so that species 1 is the rarest one. The legend shows averages percent over species.
#-----------------------------------------------------------
### Plot random effect estimation through correlation matrix
#-----------------------------------------------------------
variationPart <- variPart(test2, c("Intercept", "alti",
"rain"))# , "uso"
par(mar=c(10,5,5,1))
#par(mar=c(3,3,5,1))
barplot(t(variationPart)[,1:20], las=2, cex.names=0.75, cex.axis=0.75,
col=rainbow_hcl(7),
legend.text=paste(colnames(variationPart)," ",
signif(100*colMeans(variationPart),2),"%",sep=""),
args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))
par(mar=c(10,5,5,1))
barplot(t(variationPart)[,21:40], las=2, cex.names=0.75, cex.axis=0.75,
col=rainbow_hcl(7),
legend.text=paste(colnames(variationPart)," ",
signif(100*colMeans(variationPart),2),"%",sep=""),
args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))
par(mar=c(10,5,5,1))
barplot(t(variationPart)[,41:60], las=2, cex.names=0.75, cex.axis=0.75,
col=rainbow_hcl(7),
legend.text=paste(colnames(variationPart)," ",
signif(100*colMeans(variationPart),2),"%",sep=""),
args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))
par(mar=c(10,5,5,1))
barplot(t(variationPart)[,61:88], las=2, cex.names=0.75, cex.axis=0.75,
col=rainbow_hcl(7),
legend.text=paste(colnames(variationPart)," ",
signif(100*colMeans(variationPart),2),"%",sep=""),
args.legend=list(y=1.2, xjust=1, horiz=T, bty="n",cex=0.75))
Species-to-species association matrices identify species pairs showing a positive (red) or negative (blue) association, shown only if association has either sign with at least 75% posterior probability.
### Plotting association networks
#============================================
### Plot random effect estimation through correlation matrix
corMat <- corRandomEff(test1,cor=FALSE)
### Sampling units level
#--------------------------------------------
### Isolate the values of interest
ltri <- lower.tri(apply(corMat[, , , 1], 1:2, quantile, probs = 0.025),
diag=TRUE)
### True values
truth <- as.vector(tcrossprod(test2$results$estimation$paramLatent[[1]])[ltri])
### Average
average <- as.vector(apply(corMat[, , , 1], 1:2, mean)[ltri])
### 95% confidence intervals
corMat.025 <- as.vector(apply(corMat[, , , 1], 1:2, quantile,
probs = 0.025)[ltri])
corMat.975 <- as.vector(apply(corMat[, , , 1], 1:2, quantile,
probs=0.975)[ltri])
CI <- cbind(corMat.025, corMat.975)
### Plot the results
### Mixing object
### Draw estimated correlation matrix
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
corMat <- corRandomEff(test1, cor = TRUE)
corMat2<- corRandomEff(test2, cor=TRUE)
averageCor2<- apply(corMat2[["Auto"]],1:2,mean)
averageCor <- apply(corMat[, , , 1], 1:2, mean)
corrplot(averageCor2, method = "color",
col = colorRampPalette(c("blue", "white", "red"))(200))
predTrain <- predict(test2)
library(circlize)
## Warning: package 'circlize' was built under R version 3.4.1
corMat <- corRandomEff(test1, cor = TRUE)
averageCor <- apply(corMat[, , , 2], 1:2, mean)
colMat <- matrix(NA, nrow = nrow(averageCor),
ncol = ncol(averageCor))
colMat[which(averageCor > 0.4, arr.ind = TRUE)] <- "red"
colMat[which(averageCor < -0.4, arr.ind = TRUE)] <- "blue"
chordDiagram(averageCor, symmetric = TRUE,
grid.col = "gray",
col=colMat, annotationTrack = "grid",
annotationTrackHeight = c(0.03, 0.01),
preAllocateTracks = list(track.height = max(strwidth(unlist(dimnames(averageCor))))))
# we go back to the first track and customize sector labels
circos.track(track.index = 1, panel.fun = function(x, y) {
circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index,
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA) # here set bg.border to NA is important
Ymean1 <- apply(test1$data$Y, 2, mean)
R2_1 <- Rsquared(test1, averageSp=FALSE)
plot(Ymean1, R2_1, pch=19, ylim=c(0,0.9),
xlim=c(0,0.9))
Ymean2 <- apply(test2$data$Y, 2, mean)
R2_2 <- Rsquared(test2, averageSp=FALSE)
plot(Ymean2, R2_2, col="red" ,pch=18,
ylim=c(0,0.9),xlim=c(0,0.9), add=TRUE,
main=paste('Mean R2 value over species',
signif(mean(Ymean2),2), signif(mean(Ymean2),2)))
## Warning in plot.window(...): "add" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "add" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "add" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "add" is not a
## graphical parameter
## Warning in box(...): "add" is not a graphical parameter
## Warning in title(...): "add" is not a graphical parameter
# For a particular set of parameter, this given directly by the hmsc function
# For example for the beta (paramX)
# test1$results$estimation$paramX
# For the full joint probability distribution
fullPost <- jposterior(test2)
predTrain <- predict(test2)
# ## R SCRIPT
# ### Simulating "validation" data
# X <- matrix(nrow = 10, ncol = 3)
# colnames(X) <- colnames(alti$X)
# X[, 1] <- 1
# X[, 2] <- rnorm(10)
# X[, 3] <- rnorm(10)
# RandomSel <- sample(200, 10)
# Random <- alti$Random[RandomSel, ]
# for(i in 1:ncol(Random)){
# Random[, i] <- as.factor(as.character(Random[, i]))
# }
# colnames(Random) <- colnames(alti$Random)
# dataVal <- as.HMSCdata(X = X, Random = Random)
# ### Prediction for a new set of values
# predVal <- predict(test1, dataVal)
sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 14393)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Spanish_Colombia.1252 LC_CTYPE=Spanish_Colombia.1252
## [3] LC_MONETARY=Spanish_Colombia.1252 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Colombia.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] circlize_0.4.1 corrplot_0.84
## [3] DT_0.2 colorspace_1.3-2
## [5] knitr_1.17 mapview_2.1.4
## [7] leaflet_1.1.0 sf_0.5-4
## [9] rasterVis_0.41 latticeExtra_0.6-28
## [11] RColorBrewer_1.1-2 lattice_0.20-35
## [13] raster_2.5-8 HMSC_2.0-10
## [15] RcppArmadillo_0.8.100.1.0 Rcpp_0.12.13
## [17] coda_0.19-1 MASS_7.3-47
## [19] magrittr_1.5 sp_1.2-5
## [21] openxlsx_4.0.17 dplyr_0.7.4
## [23] purrr_0.2.4 readr_1.1.1
## [25] tidyr_0.7.2 tibble_1.3.4
## [27] ggplot2_2.2.1 tidyverse_1.1.1
## [29] plyr_1.8.4 readxl_1.0.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-131 satellite_1.0.1 lubridate_1.6.0
## [4] webshot_0.4.2 httr_1.3.1 rprojroot_1.2
## [7] tools_3.4.0 backports_1.1.1 rgdal_1.2-13
## [10] R6_2.2.2 DBI_0.7 lazyeval_0.2.0
## [13] mnormt_1.5-5 compiler_3.4.0 rvest_0.3.2
## [16] xml2_1.1.1 scales_0.5.0 hexbin_1.27.1
## [19] psych_1.7.8 stringr_1.2.0 digest_0.6.12
## [22] foreign_0.8-67 rmarkdown_1.6 R.utils_2.5.0
## [25] base64enc_0.1-3 pkgconfig_2.0.1 htmltools_0.3.6
## [28] GlobalOptions_0.0.12 htmlwidgets_0.9 rlang_0.1.2
## [31] shiny_1.0.5 shape_1.4.3 bindr_0.1
## [34] zoo_1.8-0 jsonlite_1.5 crosstalk_1.0.0
## [37] R.oo_1.21.0 munsell_0.4.3 R.methodsS3_1.7.1
## [40] stringi_1.1.5 yaml_2.1.14 grid_3.4.0
## [43] parallel_3.4.0 forcats_0.2.0 udunits2_0.13
## [46] haven_1.1.0 hms_0.3 gdalUtils_2.0.1.7
## [49] reshape2_1.4.2 codetools_0.2-15 stats4_3.4.0
## [52] glue_1.1.1 evaluate_0.10.1 modelr_0.1.1
## [55] png_0.1-7 httpuv_1.3.5 foreach_1.4.3
## [58] cellranger_1.1.0 gtable_0.2.0 assertthat_0.2.0
## [61] mime_0.5 xtable_1.8-2 broom_0.4.2
## [64] viridisLite_0.2.0 iterators_1.0.8 units_0.4-6
## [67] bindrcpp_0.2