packages

# install if required
if(!require(tidyverse)){install.packages("tidyverse")}
if(!require(googlesheets4)){install.packages("googlesheets4")}
if(!require(GGally)){install.packages("GGally")}
if(!require(reshape2)){install.packages("reshape2")}
if(!require(lme4)){install.packages("lme4")}
if(!require(compiler)){install.packages("compiler")}
if(!require(parallel)){install.packages("parallel")}
if(!require(boot)){install.packages("boot")}
if(!require(lattice)){install.packages("lattice")}
if(!require(car)){install.packages("car")}
if(!require(ggpubr)){install.packages("ggpubr")}
if(!require(vegan)){install.packages("vegan")}
if(!require(ggsci)){install.packages("ggsci")}
if(!require(ggrepel)){install.packages("ggrepel")}
if(!require(ggforce)){install.packages("ggforce")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rgl)){install.packages("rgl")}
if(!require(mgcv)){install.packages("mgcv")}
if(!require(factoextra)){install.packages("factoextra")}
if(!require(devtools)){install.packages("devtools")}
if(!require(gridExtra)){install.packages("gridExtra")}
# Load
library(tidyverse)
library(googlesheets4); gs4_deauth()
library(googledrive)
library(GGally)
library(reshape2)
library(lme4)
library(compiler)
library(parallel)
library(boot)
library(lattice)
library(car)
library(ggpubr)
library(vegan)
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)
library(plotly)
library(rgl)
library(mgcv)
library(dplyr)
library(factoextra)
library(devtools)
library(gridExtra)

library(BiodiversityR) # also loads vegan
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)

require(stats)

install_github("vqv/ggbiplot")
require(ggbiplot)

# Enable repository from gavinsimpson
options(repos = c(
  gavinsimpson = 'https://gavinsimpson.r-universe.dev',
  CRAN = 'https://cloud.r-project.org'))

Dataset

Base de datos. Google Sheet

ss= "https://docs.google.com/spreadsheets/d/1NKOlD_JM-rQaAAozckglz4csL85TdwYLNToGv3nHcjY/edit?usp=sharing"
hoja= "SppEnv"
rango="J2:FC115"
#
cr.sp <- read_sheet(ss,
                  sheet=hoja,
                  range=rango,
                  col_names = TRUE,
                  col_types = NULL,
                  na= "NA")
cr.sp <- as.data.frame(cr.sp)

spec.columns <- as.character(colnames(cr.sp))
names(cr.sp) <- spec.columns
dim(cr.sp)
## [1] 113 150
## EnV Data
ss= "https://docs.google.com/spreadsheets/d/1NKOlD_JM-rQaAAozckglz4csL85TdwYLNToGv3nHcjY/edit?usp=sharing"
hoja="SppEnv"
rango="C2:H115"
cr.env <- read_sheet(ss,
                  sheet=hoja,
                  range=rango,
                  col_names = TRUE,
                  col_types = NULL,
                  na= "NA")
cr.env <- as.data.frame(cr.env)

site.rows <- as.character(cr.env[,1])
cr.env <- cr.env[,2:6]
rownames(cr.env) <- site.rows

cr.env$UsoDSuelo <- as.factor(cr.env$UsoDSuelo)
str(cr.env)
## 'data.frame':    113 obs. of  5 variables:
##  $ cobdos   : num  65.3 65.3 51.7 46.7 55 ...
##  $ alt.arbol: num  9.57 9.57 8.1 9 11.3 7 8.2 9.69 11.6 11.7 ...
##  $ riqveg   : num  5 5 1 4 5 9 3 5 5 8 ...
##  $ num.arb  : num  27 27 5 8 15 51 74 13 7 40 ...
##  $ UsoDSuelo: Factor w/ 12 levels "Bf","Bp","Br",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(cr.env)
##      cobdos         alt.arbol          riqveg          num.arb      
##  Min.   :  0.00   Min.   : 0.000   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.: 21.88   1st Qu.: 6.640   1st Qu.: 3.000   1st Qu.:  7.00  
##  Median : 64.06   Median : 8.800   Median : 6.000   Median : 23.00  
##  Mean   : 56.74   Mean   : 8.623   Mean   : 6.969   Mean   : 29.21  
##  3rd Qu.: 87.66   3rd Qu.:11.510   3rd Qu.: 9.000   3rd Qu.: 46.25  
##  Max.   :100.00   Max.   :20.200   Max.   :27.000   Max.   :101.00  
##                                                                     
##    UsoDSuelo 
##  Bf     :10  
##  Br     :10  
##  Bsi    :10  
##  Cp     :10  
##  Pma    :10  
##  Pmb    :10  
##  (Other):53
# checking, rownames
check.datasets(cr.sp, cr.env)
## Warning: rownames for community and environmental datasets are different
rownames(cr.sp) <- rownames(cr.env)

## managing empty cells
cr.env <- cr.env[complete.cases(cr.env[,1:4]), ]

check.datasets(cr.sp, cr.env)
## OK
M1 <- cr.sp
M2 <- cr.env
##
#M2<- M2[rowSums(M2[,1:4])>0,]
check.datasets(M1,M2)
## OK

vegan

UN-CONSTRAINED

ord.model1 <- decorana(decostand(M1, method="hellinger"))
ord.model1 # CHequeamos el largo del gradiente ambiental al que las especies están respondiendo. Gradientes de 3-4 se consideran "grandes" y se considera buena decisión utilizar método de ordenación que asuma respuestas unimodales de las especies. Es decir, promedios ponderados (e.g., Análisis de correspondencia), y no sumas ponderadas (e.g., Componentes principales).
## 
## Call:
## decorana(veg = decostand(M1, method = "hellinger")) 
## 
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
## Total inertia (scaled Chi-square): 8.7011 
## 
##                        DCA1   DCA2   DCA3   DCA4
## Eigenvalues          0.3915 0.2481 0.2313 0.1965
## Additive Eigenvalues 0.3915 0.2420 0.2296 0.1962
## Decorana values      0.4330 0.2978 0.2227 0.2030
## Axis lengths         4.6300 3.8308 2.5308 3.1930

CONSTRAINED

MODEL Building [Vegan PCKG functions]

La hipótesis de trabajo es que es que la riqueza de vegetación, combinada con otra variable ambiental que representa la densidad de árboles, explican la distribución de las especies en el actual paisaje. Por lo tanto se necesita comprobar con cuáles variables ambientales, la riqueza de vegetación se puede combinar para considerarlas como los mejores predictores de la matriz de especies.

upr <- cca(M1 ~ cobdos + riqveg + num.arb + alt.arbol, 
                  data= M2)  # Full Model

lwr <- cca(M1 ~ 1, data=M2)   # Null model

vif.cca(upr) # V.I.F for the full model
##    cobdos    riqveg   num.arb alt.arbol 
##  2.544008  2.580015  2.190215  2.138332
# Stepwise (FORWARD) selection 
set.seed(123)
mods <- ordistep(lwr, scope = formula(upr), trace = 0)
mods$anova
##          Df    AIC      F Pr(>F)   
## + riqveg  1 673.33 3.2013  0.005 **
## + cobdos  1 673.81 1.4857  0.010 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Stepwise (BACKWARD) selection 
mods2 <- step(upr, 
              scope = list(lower = formula(lwr), upper = formula(upr)), 
              trace = 0, 
              test = "perm")
mods2$anova
##          Step Df Deviance Resid. Df Resid. Dev      AIC
## 1             NA       NA       108   40753.56 675.3339
## 2   - num.arb  1 437.4136       109   41190.97 674.5403
## 3 - alt.arbol  1 465.4762       110   41656.45 673.8101
## 4    - cobdos  1 562.6371       111   42219.09 673.3261

Utilizar el comando adecuado para ejecutar la selección de las variables que componen el mejor modelo explicativo, según Blanchet, Legendre & Borcard. 2008. Ecology 89, 2623-2623 (ver sesión del 25 de Oct.)

## Stepwise Selection via Adjusted R^2
### 1. Global test
### 2. Radjusted forward selection

finalModel <- __________(lwr, upr, trace = FALSE) 

finalModel

# Adjusted R^2
RsquareAdj(mods)
RsquareAdj(mods2)
RsquareAdj(finalModel)

# VIF 
vif.cca(finalModel) # estamos "OK"!

Permutest

# Utilizando tests de permutaciones para calcular significancias
# Modelo completo
upr$call
## cca(formula = M1 ~ cobdos + riqveg + num.arb + alt.arbol, data = M2)
pstat <- permustats(anova(upr))
summary(pstat)
## 
##       statistic    SES   mean lower median  upper Pr(perm)    
## Model    1.7776 7.9754 0.9953       0.9886 1.1628    0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)

# Modelo con el que se decidió trabajar
finalModel$call
## cca(formula = M1 ~ riqveg + cobdos, data = M2)
pstat <- permustats(anova(finalModel))
summary(pstat)
## 
##       statistic     SES    mean lower  median   upper Pr(perm)    
## Model    2.3505 10.2622  0.9957        0.9797  1.2323    0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)

### model significance
#anova(modelo, model = c("direct", "full", "reduced"),by = c("axis", "terms", "margin"))


### Testing by "Axis"
set.seed(1)
anova(finalModel, by="axis")
## Permutation test for cca under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = M1 ~ riqveg + cobdos, data = M2)
##           Df ChiSquare      F Pr(>F)    
## CCA1       1    0.2810 3.3878  0.001 ***
## CCA2       1    0.1089 1.3132  0.047 *  
## Residual 110    9.1252                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Testing ny "Terms"
set.seed(5)
anova(finalModel,
      by = "terms")
## Permutation test for cca under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = M1 ~ riqveg + cobdos, data = M2)
##           Df ChiSquare      F Pr(>F)    
## riqveg     1    0.2667 3.2153  0.001 ***
## cobdos     1    0.1233 1.4857  0.011 *  
## Residual 110    9.1252                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Este método es el menos sesgado.

Calcula la importancia de cada término, cuando este se remueve del modelo global, i.e., “Efecto Marginal” . (utilizar el argumento adecuado)

### Marginal effects (MORE USEFUL !) 
set.seed(10)
anova(finalModel, by = "_________")

Diagrama de ordenación

.long formats

plot2 <- ordiplot(finalModel, choices = c(1,2), hill = TRUE)

sites.long <- sites.long(plot2, env.data=M2)
species.long <- species.long(plot2)

axis.long <- axis.long(finalModel, choices = c(1,2))

env.fit para variables respuesta (Spp)

spec.envfit <- envfit(plot2, env=M1)
spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals)
species.long2 <- species.long(plot2, spec.data=spec.data.envfit)
head(species.long2, n=20)
##              r     p       axis1       axis2 labels
## 1  0.032293827 0.128 -0.90757849  0.10364824      1
## 2  0.069526516 0.017  0.18138883  0.13475252      2
## 3  0.003597418 0.823 -0.49405560  0.37623479      3
## 4  0.006687715 0.641  0.01213478 -0.06934233      4
## 5  0.014625186 0.421  0.44751659  0.01774183      5
## 6  0.042612038 0.101 -0.66623732  0.86334608      6
## 7  0.011600890 0.485 -0.77606899 -0.84447654      7
## 8  0.009602421 0.448 -0.49512891  0.72307565      8
## 9  0.013723349 0.424  0.42966045 -0.02074001      9
## 10 0.001483311 0.870 -0.72758542 -0.64544286     10
## 11 0.018491019 0.345  0.03500078 -0.15318092     11
## 12 0.028507020 0.220 -2.26130742 -0.88195472     12
## 13 0.004352336 0.778  0.09162240  0.29007577     13
## 14 0.113732535 0.022 -2.08862912 -0.61151892     14
## 15 0.202765800 0.001 -2.63746042 -1.30642803     15
## 16 0.048874475 0.078 -0.54662864  0.17255316     16
## 17 0.008529457 0.544 -0.38401839  0.30920651     17
## 18 0.014093521 0.428  0.35104346 -0.03299250     18
## 19 0.004431857 0.740  0.70742714 -0.11452255     19
## 20 0.002282268 0.781  0.06258033  0.43422378     20

En este caso no utilizamos envfit() para las variables ambientales. Necesitamos obtener los ‘biplot scores’.

## Vector Fit
vectors<- data.frame(cbind(vector= rownames(data.frame(plot2$biplot)), axis1=data.frame(plot2$biplot)[,1] , axis2=data.frame(plot2$biplot)[,2] ) )
vectors$vector <- sapply(vectors[,1], as.factor)
vectors$axis1 <- as.numeric(vectors$axis1)
vectors$axis2 <- as.numeric(vectors$axis2)
vectors
##   vector      axis1      axis2
## 1 riqveg -0.9575183 -0.2883724
## 2 cobdos -0.8414543  0.5403283
#vectors.envfit <- envfit(plot2, env= M2[,c(3,14)])
#vectors.long3 <- vectorfit.long()
#vectors.long3

### Spec r >= 0%
species.long3 <- species.long2[species.long2$r >= 0, ]
head(species.long3, n=20)
##              r     p       axis1       axis2 labels
## 1  0.032293827 0.128 -0.90757849  0.10364824      1
## 2  0.069526516 0.017  0.18138883  0.13475252      2
## 3  0.003597418 0.823 -0.49405560  0.37623479      3
## 4  0.006687715 0.641  0.01213478 -0.06934233      4
## 5  0.014625186 0.421  0.44751659  0.01774183      5
## 6  0.042612038 0.101 -0.66623732  0.86334608      6
## 7  0.011600890 0.485 -0.77606899 -0.84447654      7
## 8  0.009602421 0.448 -0.49512891  0.72307565      8
## 9  0.013723349 0.424  0.42966045 -0.02074001      9
## 10 0.001483311 0.870 -0.72758542 -0.64544286     10
## 11 0.018491019 0.345  0.03500078 -0.15318092     11
## 12 0.028507020 0.220 -2.26130742 -0.88195472     12
## 13 0.004352336 0.778  0.09162240  0.29007577     13
## 14 0.113732535 0.022 -2.08862912 -0.61151892     14
## 15 0.202765800 0.001 -2.63746042 -1.30642803     15
## 16 0.048874475 0.078 -0.54662864  0.17255316     16
## 17 0.008529457 0.544 -0.38401839  0.30920651     17
## 18 0.014093521 0.428  0.35104346 -0.03299250     18
## 19 0.004431857 0.740  0.70742714 -0.11452255     19
## 20 0.002282268 0.781  0.06258033  0.43422378     20
### Var Ambientales r >= 0
#vectors.long3 <- vectors.long3[vectors.long3$r >= 0, ]
#vectors.long3

Ellipses

#plot2 <- ordiplot(ord.model1, choices=c(1,2))
#plot2
#usosuelo.ellipses <- ordiellipse(plot2, groups=M2$UsoDSuelo, display="sites", kind="se")
#usosuelo.ellipses.long <- ordiellipse.long(usosuelo.ellipses, grouping.name="UsoDeSuelo")

Centroids

#plot2 <- ordiplot(ord.model1, choices=c(1,2))
#plot2
#centroids.long1 <- centroids.long(sites.long, grouping=sites.long$UsoDSuelo, FUN = mean, centroids.only = TRUE)

ggplots

f = 1

plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  xlab(axis.long[1, "label"]) +
    ylab(axis.long[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    #geom_point(data=sites.long, 
     #          aes(x=axis1, y=axis2), 
      #         size=3, shape=1) +
  
  #geom_text_repel(data=sites.long,aes(x=axis1, y=axis2, label=labels),colour="black") +
  
  geom_point(data=species.long2,aes(x=axis1*f, y=axis2*f)) +
    #geom_segment(data=species.long3,aes(x=0, y=0, xend=axis1*f, yend=axis2*f),colour="red", linewidth=0.7, arrow=arrow(angle=20)) +
   geom_text_repel(data=species.long3,aes(x=axis1*f, y=axis2*f, label=labels),colour="red", max.overlaps = 10) +
  
   geom_segment(data=vectors,aes(x=0, y=0, xend=axis1*f*4, yend=axis2*f*4),colour="blue", linewidth=0.7, arrow=arrow(angle=20)) +
    geom_text_repel(data=vectors,aes(x=axis1*f*4, y= axis2*f*4, label=vector),colour="blue") +

  #geom_polygon(data=usosuelo.ellipses.long,
   #            aes(x=axis1, y=axis2, colour= usosuelo.ellipses.long$UsoDeSuelo,
    #               fill=after_scale(alpha(colour, 0.2))),
     #          linewidth=0.2, show.legend=TRUE) +
  #geom_point(data=centroids.long1,
   #          aes(x=axis1c,y=axis2c, col=centroids.long1$Centroid)) +
  
  #    ggsci::scale_colour_npg() +
    coord_fixed(ratio=1)

plotgg2
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Ordisurfs

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,87]), add=TRUE, col="orange", main= "'87' response to environmental gradient") # respuesta de la spp "#87". Abundancia más alta esperada a valores intermedios de riqueza de vegetación y cobertura de dosel 

Realizar dos diagramas de especies que según este análisis, se encuentran a valores altos deriqueza de vegetación

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,_____]), add=TRUE, col="orange", main= "'______' response to environmental gradient") 
plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,_____]), add=TRUE, col="orange", main= "'______' response to environmental gradient")