packages

# install if required
if(!require(tidyverse)){install.packages("tidyverse")}
if(!require(googlesheets4)){install.packages("googlesheets4")}
if(!require(GGally)){install.packages("GGally")}
if(!require(reshape2)){install.packages("reshape2")}
if(!require(lme4)){install.packages("lme4")}
if(!require(compiler)){install.packages("compiler")}
if(!require(parallel)){install.packages("parallel")}
if(!require(boot)){install.packages("boot")}
if(!require(lattice)){install.packages("lattice")}
if(!require(car)){install.packages("car")}
if(!require(ggpubr)){install.packages("ggpubr")}
if(!require(vegan)){install.packages("vegan")}
if(!require(ggsci)){install.packages("ggsci")}
if(!require(ggrepel)){install.packages("ggrepel")}
if(!require(ggforce)){install.packages("ggforce")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rgl)){install.packages("rgl")}
if(!require(mgcv)){install.packages("mgcv")}
if(!require(factoextra)){install.packages("factoextra")}
if(!require(devtools)){install.packages("devtools")}
if(!require(gridExtra)){install.packages("gridExtra")}
if(!require(BiodiversityR)){install.packages("BiodiversityR")}
# Load
library(tidyverse)
library(googlesheets4); gs4_deauth()
library(googledrive)
library(GGally)
library(reshape2)
library(lme4)
library(compiler)
library(parallel)
library(boot)
library(lattice)
library(car)
library(ggpubr)
library(vegan)
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)
library(plotly)
library(rgl)
library(mgcv)
library(dplyr)
library(factoextra)
library(devtools)
library(gridExtra)

library(BiodiversityR) # also loads vegan
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)

require(stats)

install_github("vqv/ggbiplot")
require(ggbiplot)

Data

[Varespec-Varechem Datasets] ](https://www.researchgate.net/publication/227830523_Effects_of_reindeer_grazing_on_vegetation_in_dry_Pinus_sylvestris_forests)

data(varespec)
data(varechem)
M1 <- varespec
M2 <- varechem

vegan

MODEL Building [Vegan PCKG functions]

upr <- cca(M1~ ., data=M2) # Full Model
lwr <- cca(M1~ 1, data=M2) # Null model

vif.cca(upr) # V.I.F for the full model
##         N         P         K        Ca        Mg         S        Al        Fe 
##  1.981742  6.028515 12.009357  9.925801  9.810609 18.378794 21.192739  9.127762 
##        Mn        Zn        Mo  Baresoil  Humdepth        pH 
##  5.380432  7.739664  4.320346  2.253683  6.012537  7.389267
# Stepwise (FORWARD) selection 
set.seed(1)
mods <- ordistep(lwr, scope = formula(upr), trace = 0)
mods$anova
##      Df    AIC      F Pr(>F)   
## + Al  1 128.61 3.6749  0.005 **
## + P   1 127.91 2.5001  0.005 **
## + K   1 127.44 2.1688  0.050 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Stepwise (BACKWARD) selection 
mods2 <- step(upr, 
              scope = list(lower = formula(lwr), upper = formula(upr)), 
              trace = 0, 
              test = "perm")
mods2$anova
##   Step Df Deviance Resid. Df Resid. Dev      AIC
## 1      NA       NA         9   1551.483 130.0539
## 2 - Fe  1 115.2085        10   1666.692 129.7730
## 3 - Al  1 105.9602        11   1772.652 129.2523
## 4  - N  1 117.5382        12   1890.190 128.7931
## 5 - pH  1 140.4399        13   2030.630 128.5131
## 6 - Ca  1 141.2450        14   2171.875 128.1270
## 7 - Zn  1 165.2596        15   2337.135 127.8871

Este método es el más recomendado según Blanchet, Legendre & Borcard. 2008. Ecology 89, 2623-2623

Es un ‘Forward selection’ pero se detiene una vez el r^2 del modelo globas es sobrepasado por el modelo que se está poniendo a prueba.

## Stepwise Selection via Adjusted R^2
### 1. Global test
### 2. Radjusted forward selection

finalModel <- ordiR2step(lwr, upr, trace = FALSE) 

finalModel
## Call: cca(formula = M1 ~ Al + P + K, data = M2)
## 
##               Inertia Proportion Rank
## Total          2.0832     1.0000     
## Constrained    0.6441     0.3092    3
## Unconstrained  1.4391     0.6908   20
## Inertia is scaled Chi-square 
## 
## Eigenvalues for constrained axes:
##   CCA1   CCA2   CCA3 
## 0.3616 0.1700 0.1126 
## 
## Eigenvalues for unconstrained axes:
##    CA1    CA2    CA3    CA4    CA5    CA6    CA7    CA8 
## 0.3500 0.2201 0.1851 0.1551 0.1351 0.1003 0.0773 0.0537 
## (Showing 8 of 20 unconstrained eigenvalues)
# Adjusted R^2
RsquareAdj(upr)
## $r.squared
## [1] 0.6919576
## 
## $adj.r.squared
## [1] 0.216964
RsquareAdj(mods2)
## $r.squared
## [1] 0.5359688
## 
## $adj.r.squared
## [1] 0.2884275
RsquareAdj(finalModel)
## $r.squared
## [1] 0.309204
## 
## $adj.r.squared
## [1] 0.2063241
# VIF 
vif.cca(finalModel) # estamos "OK"!
##       Al        P        K 
## 1.011808 2.365413 2.378806

Permutest

# Utilizando tests de permutaciones para calcular significancias
# Modelo completo
upr$call
## cca(formula = M1 ~ N + P + K + Ca + Mg + S + Al + Fe + Mn + Zn + 
##     Mo + Baresoil + Humdepth + pH, data = M2)
pstat <- permustats(anova(upr))
summary(pstat)
## 
##       statistic    SES   mean lower median  upper Pr(perm)  
## Model    1.4441 2.2415 1.0083       0.9951 1.3496    0.027 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)

# Modelo con el que se decidió trabajar
finalModel$call
## cca(formula = M1 ~ Al + P + K, data = M2)
pstat <- permustats(anova(finalModel))
summary(pstat)
## 
##       statistic    SES   mean lower median  upper Pr(perm)    
## Model    2.9840 6.8660 1.0276       0.9966 1.5327    0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)

### model significance
#anova(modelo, model = c("direct", "full", "reduced"),by = c("axis", "terms", "margin"))


### Testing by "Axis"
set.seed(1)
anova(finalModel, by="axis")
## Permutation test for cca under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = M1 ~ Al + P + K, data = M2)
##          Df ChiSquare      F Pr(>F)    
## CCA1      1   0.36156 5.0249  0.001 ***
## CCA2      1   0.16996 2.3621  0.034 *  
## CCA3      1   0.11262 1.5651  0.142    
## Residual 20   1.43906                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Testing ny "Terms"
set.seed(5)
anova(finalModel,
      by = "terms")
## Permutation test for cca under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = M1 ~ Al + P + K, data = M2)
##          Df ChiSquare      F Pr(>F)    
## Al        1   0.29817 4.1440  0.001 ***
## P         1   0.18991 2.6393  0.005 ** 
## K         1   0.15605 2.1688  0.018 *  
## Residual 20   1.43906                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Este método para probar significancia de cada témrino, es el menos sesgado.

Calcula la importancia de cada término, cuando este se remueve del modelo global, i.e., “Efecto Marginal”

### Marginal effects (MORE USEFUL !) 
set.seed(10)
anova(finalModel, by = "margin")
## Permutation test for cca under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = M1 ~ Al + P + K, data = M2)
##          Df ChiSquare      F Pr(>F)    
## Al        1   0.31184 4.3340  0.001 ***
## P         1   0.16810 2.3362  0.016 *  
## K         1   0.15605 2.1688  0.029 *  
## Residual 20   1.43906                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## En este caso este coincide con el modelo que salió en método "Forward Selection".

Diagrama de ordenación

.long formats

plot2 <- ordiplot(finalModel, choices = c(1,2))

sites.long <- sites.long(plot2, env.data=M2)
species.long <- species.long(plot2)


axis.long <- axis.long(finalModel, choices = c(1,2))

env.fit

spec.envfit <- envfit(plot2, env=M1)
spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals)
species.long2 <- species.long(plot2, spec.data=spec.data.envfit)
species.long2
##                    r     p        axis1        axis2   labels
## Callvulg 0.309470973 0.005  0.205375226  1.452924415 Callvulg
## Empenigr 0.020883799 0.792 -0.249241648  0.106172410 Empenigr
## Rhodtome 0.161976149 0.139 -0.832342062  0.695653346 Rhodtome
## Vaccmyrt 0.336826456 0.009 -1.049373322  0.142718687 Vaccmyrt
## Vaccviti 0.099445805 0.333 -0.213881718 -0.035220466 Vaccviti
## Pinusylv 0.086815579 0.415  0.043237733  0.088553636 Pinusylv
## Descflex 0.172584093 0.097 -1.294154852 -0.001709944 Descflex
## Betupube 0.050952272 0.668 -0.387689078  1.094088293 Betupube
## Vacculig 0.090790443 0.414  0.834216328  0.342176685 Vacculig
## Diphcomp 0.042980512 0.708  0.106526408  0.050445777 Diphcomp
## Dicrsp   0.278455934 0.023 -0.653071689 -1.082857853   Dicrsp
## Dicrfusc 0.403616466 0.002 -0.590504122  0.768622911 Dicrfusc
## Dicrpoly 0.065135135 0.544 -0.316789772  0.039537608 Dicrpoly
## Hylosple 0.370651832 0.005 -1.604194408 -0.829912243 Hylosple
## Pleuschr 0.794824545 0.001 -0.865566275 -0.222385837 Pleuschr
## Polypili 0.046393542 0.614  0.137924211 -0.140649151 Polypili
## Polyjuni 0.102783555 0.321 -0.648617001 -0.244482860 Polyjuni
## Polycomm 0.121840371 0.242 -0.653220113  0.465016595 Polycomm
## Pohlnuta 0.124451474 0.258 -0.163687044 -0.227288195 Pohlnuta
## Ptilcili 0.041927921 0.727 -0.341777868  0.809298199 Ptilcili
## Barbhatc 0.039443624 0.709 -0.320290020  1.185539057 Barbhatc
## Cladarbu 0.503056082 0.002  0.398751456  0.348305429 Cladarbu
## Cladrang 0.533301416 0.001  0.520351217  0.168310020 Cladrang
## Cladstel 0.696293145 0.001  0.629241794 -0.391048460 Cladstel
## Cladunci 0.054001544 0.602 -0.194332052  0.015882609 Cladunci
## Cladcocc 0.226273913 0.064  0.097060652  0.125350164 Cladcocc
## Cladcorn 0.022377554 0.812 -0.295288599 -0.162905785 Cladcorn
## Cladgrac 0.024851103 0.782 -0.029672398  0.029000510 Cladgrac
## Cladfimb 0.150873073 0.161 -0.032158802  0.218355677 Cladfimb
## Cladcris 0.093106616 0.371 -0.263609738  0.280348213 Cladcris
## Cladchlo 0.112892636 0.280  0.168974903 -0.235192210 Cladchlo
## Cladbotr 0.080607058 0.459 -0.415199080  0.808820260 Cladbotr
## Cladamau 0.073632045 0.500  0.085268456  0.152263323 Cladamau
## Cladsp   0.073513982 0.499  0.475149446 -0.277959638   Cladsp
## Cetreric 0.002849095 0.974  0.107652765 -0.273134098 Cetreric
## Cetrisla 0.027469830 0.793 -0.000030198  0.095129073 Cetrisla
## Flavniva 0.131592235 0.165  1.478049964 -1.429699358 Flavniva
## Nepharct 0.087080372 0.495 -0.763014515 -0.440704064 Nepharct
## Stersp   0.107864754 0.306  0.173393520  0.294436133   Stersp
## Peltapht 0.015138538 0.848 -0.555462441 -0.413914281 Peltapht
## Icmaeric 0.194617460 0.098  0.214261881  0.803093839 Icmaeric
## Cladcerv 0.156222022 0.170  0.997509729 -1.061630172 Cladcerv
## Claddefo 0.154053541 0.179 -0.364907953  0.145760543 Claddefo
## Cladphyl 0.199125494 0.098  0.297272860 -0.817884873 Cladphyl

En este caso no utilizamos envfit(). Necesitamos la obtención de los ‘biplot scores’.

## Vector Fit
vectors<- data.frame(cbind(vector= rownames(data.frame(plot2$biplot)), axis1=data.frame(plot2$biplot)[,1] , axis2=data.frame(plot2$biplot)[,2] ) )
vectors$vector <- sapply(vectors[,1], as.factor)
vectors$axis1 <- as.numeric(vectors$axis1)
vectors$axis2 <- as.numeric(vectors$axis2)
vectors
##   vector      axis1      axis2
## 1     Al  0.8599208 -0.1603975
## 2      P -0.4195228 -0.7513381
## 3      K -0.4404434 -0.1646720
#vectors.envfit <- envfit(plot2, env= M2[,c(3,14)])
#vectors.long3 <- vectorfit.long()
#vectors.long3

### Spec r >= 0%
species.long3 <- species.long2[species.long2$r >= 0, ]
species.long3
##                    r     p        axis1        axis2   labels
## Callvulg 0.309470973 0.005  0.205375226  1.452924415 Callvulg
## Empenigr 0.020883799 0.792 -0.249241648  0.106172410 Empenigr
## Rhodtome 0.161976149 0.139 -0.832342062  0.695653346 Rhodtome
## Vaccmyrt 0.336826456 0.009 -1.049373322  0.142718687 Vaccmyrt
## Vaccviti 0.099445805 0.333 -0.213881718 -0.035220466 Vaccviti
## Pinusylv 0.086815579 0.415  0.043237733  0.088553636 Pinusylv
## Descflex 0.172584093 0.097 -1.294154852 -0.001709944 Descflex
## Betupube 0.050952272 0.668 -0.387689078  1.094088293 Betupube
## Vacculig 0.090790443 0.414  0.834216328  0.342176685 Vacculig
## Diphcomp 0.042980512 0.708  0.106526408  0.050445777 Diphcomp
## Dicrsp   0.278455934 0.023 -0.653071689 -1.082857853   Dicrsp
## Dicrfusc 0.403616466 0.002 -0.590504122  0.768622911 Dicrfusc
## Dicrpoly 0.065135135 0.544 -0.316789772  0.039537608 Dicrpoly
## Hylosple 0.370651832 0.005 -1.604194408 -0.829912243 Hylosple
## Pleuschr 0.794824545 0.001 -0.865566275 -0.222385837 Pleuschr
## Polypili 0.046393542 0.614  0.137924211 -0.140649151 Polypili
## Polyjuni 0.102783555 0.321 -0.648617001 -0.244482860 Polyjuni
## Polycomm 0.121840371 0.242 -0.653220113  0.465016595 Polycomm
## Pohlnuta 0.124451474 0.258 -0.163687044 -0.227288195 Pohlnuta
## Ptilcili 0.041927921 0.727 -0.341777868  0.809298199 Ptilcili
## Barbhatc 0.039443624 0.709 -0.320290020  1.185539057 Barbhatc
## Cladarbu 0.503056082 0.002  0.398751456  0.348305429 Cladarbu
## Cladrang 0.533301416 0.001  0.520351217  0.168310020 Cladrang
## Cladstel 0.696293145 0.001  0.629241794 -0.391048460 Cladstel
## Cladunci 0.054001544 0.602 -0.194332052  0.015882609 Cladunci
## Cladcocc 0.226273913 0.064  0.097060652  0.125350164 Cladcocc
## Cladcorn 0.022377554 0.812 -0.295288599 -0.162905785 Cladcorn
## Cladgrac 0.024851103 0.782 -0.029672398  0.029000510 Cladgrac
## Cladfimb 0.150873073 0.161 -0.032158802  0.218355677 Cladfimb
## Cladcris 0.093106616 0.371 -0.263609738  0.280348213 Cladcris
## Cladchlo 0.112892636 0.280  0.168974903 -0.235192210 Cladchlo
## Cladbotr 0.080607058 0.459 -0.415199080  0.808820260 Cladbotr
## Cladamau 0.073632045 0.500  0.085268456  0.152263323 Cladamau
## Cladsp   0.073513982 0.499  0.475149446 -0.277959638   Cladsp
## Cetreric 0.002849095 0.974  0.107652765 -0.273134098 Cetreric
## Cetrisla 0.027469830 0.793 -0.000030198  0.095129073 Cetrisla
## Flavniva 0.131592235 0.165  1.478049964 -1.429699358 Flavniva
## Nepharct 0.087080372 0.495 -0.763014515 -0.440704064 Nepharct
## Stersp   0.107864754 0.306  0.173393520  0.294436133   Stersp
## Peltapht 0.015138538 0.848 -0.555462441 -0.413914281 Peltapht
## Icmaeric 0.194617460 0.098  0.214261881  0.803093839 Icmaeric
## Cladcerv 0.156222022 0.170  0.997509729 -1.061630172 Cladcerv
## Claddefo 0.154053541 0.179 -0.364907953  0.145760543 Claddefo
## Cladphyl 0.199125494 0.098  0.297272860 -0.817884873 Cladphyl
### Var Ambientales r >= 0
#vectors.long3 <- vectors.long3[vectors.long3$r >= 0, ]
#vectors.long3

ggplots

f = 1

plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  xlab(axis.long[1, "label"]) +
    ylab(axis.long[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    geom_point(data=sites.long, 
               aes(x=axis1, y=axis2), 
               size=3, shape=1) +
  
  geom_text_repel(data=sites.long,aes(x=axis1, y=axis2, label=labels),colour="black") +
  
   geom_point(data=species.long2,aes(x=axis1*f, y=axis2*f)) +
    #geom_segment(data=species.long3,aes(x=0, y=0, xend=axis1*f, yend=axis2*f),colour="red", linewidth=0.7, arrow=arrow(angle=20)) +
    geom_text_repel(data=species.long3,aes(x=axis1*f, y=axis2*f, label=labels),colour="red") +
  
   geom_segment(data=vectors,aes(x=0, y=0, xend=axis1*f*3, yend=axis2*f*3),colour="blue", linewidth=0.7, arrow=arrow(angle=20)) +
    geom_text_repel(data=vectors,aes(x=axis1*f*3, y= axis2*f*3, label=vector),colour="blue") +

  
    ggsci::scale_colour_npg() +
    coord_fixed(ratio=1)

plotgg2
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Ordisurfs

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1$Hylosple), add=TRUE, col="orange", main= "'Hylosple' response to environmental gradient") # "Hylosple" species response

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1$Ptilcili), add=TRUE, col="orange", main= "'Ptilcili' response to environmental gradient") # "Ptilcili" species response

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1$Cladphyl), add=TRUE, col="orange", main= "'Cladphyl' response to environmental gradient") # "Cladphyl" species response

plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1$Nepharct), add=TRUE, col="orange", main= "'Nepharct' response to environmental gradient") # "Nepharct" species response