packages
# install if required
if(!require(tidyverse)){install.packages("tidyverse")}
if(!require(googlesheets4)){install.packages("googlesheets4")}
if(!require(GGally)){install.packages("GGally")}
if(!require(reshape2)){install.packages("reshape2")}
if(!require(lme4)){install.packages("lme4")}
if(!require(compiler)){install.packages("compiler")}
if(!require(parallel)){install.packages("parallel")}
if(!require(boot)){install.packages("boot")}
if(!require(lattice)){install.packages("lattice")}
if(!require(car)){install.packages("car")}
if(!require(ggpubr)){install.packages("ggpubr")}
if(!require(vegan)){install.packages("vegan")}
if(!require(ggsci)){install.packages("ggsci")}
if(!require(ggrepel)){install.packages("ggrepel")}
if(!require(ggforce)){install.packages("ggforce")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rgl)){install.packages("rgl")}
if(!require(mgcv)){install.packages("mgcv")}
if(!require(factoextra)){install.packages("factoextra")}
if(!require(devtools)){install.packages("devtools")}
if(!require(gridExtra)){install.packages("gridExtra")}
# Load
library(tidyverse)
library(googlesheets4); gs4_deauth()
library(googledrive)
library(GGally)
library(reshape2)
library(lme4)
library(compiler)
library(parallel)
library(boot)
library(lattice)
library(car)
library(ggpubr)
library(vegan)
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)
library(plotly)
library(rgl)
library(mgcv)
library(dplyr)
library(factoextra)
library(devtools)
library(gridExtra)
library(BiodiversityR) # also loads vegan
library(readxl)
library(ggsci)
library(ggrepel)
library(ggforce)
require(stats)
install_github("vqv/ggbiplot")
require(ggbiplot)
# Enable repository from gavinsimpson
options(repos = c(
gavinsimpson = 'https://gavinsimpson.r-universe.dev',
CRAN = 'https://cloud.r-project.org'))
Base de datos. Google Sheet
ss= "https://docs.google.com/spreadsheets/d/1NKOlD_JM-rQaAAozckglz4csL85TdwYLNToGv3nHcjY/edit?usp=sharing"
hoja= "SppEnv"
rango="J2:FC115"
#
cr.sp <- read_sheet(ss,
sheet=hoja,
range=rango,
col_names = TRUE,
col_types = NULL,
na= "NA")
cr.sp <- as.data.frame(cr.sp)
spec.columns <- as.character(colnames(cr.sp))
names(cr.sp) <- spec.columns
dim(cr.sp)
## [1] 113 150
## EnV Data
ss= "https://docs.google.com/spreadsheets/d/1NKOlD_JM-rQaAAozckglz4csL85TdwYLNToGv3nHcjY/edit?usp=sharing"
hoja="SppEnv"
rango="C2:H115"
cr.env <- read_sheet(ss,
sheet=hoja,
range=rango,
col_names = TRUE,
col_types = NULL,
na= "NA")
cr.env <- as.data.frame(cr.env)
site.rows <- as.character(cr.env[,1])
cr.env <- cr.env[,2:6]
rownames(cr.env) <- site.rows
cr.env$UsoDSuelo <- as.factor(cr.env$UsoDSuelo)
str(cr.env)
## 'data.frame': 113 obs. of 5 variables:
## $ cobdos : num 65.3 65.3 51.7 46.7 55 ...
## $ alt.arbol: num 9.57 9.57 8.1 9 11.3 7 8.2 9.69 11.6 11.7 ...
## $ riqveg : num 5 5 1 4 5 9 3 5 5 8 ...
## $ num.arb : num 27 27 5 8 15 51 74 13 7 40 ...
## $ UsoDSuelo: Factor w/ 12 levels "Bf","Bp","Br",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(cr.env)
## cobdos alt.arbol riqveg num.arb
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 21.88 1st Qu.: 6.640 1st Qu.: 3.000 1st Qu.: 7.00
## Median : 64.06 Median : 8.800 Median : 6.000 Median : 23.00
## Mean : 56.74 Mean : 8.623 Mean : 6.969 Mean : 29.21
## 3rd Qu.: 87.66 3rd Qu.:11.510 3rd Qu.: 9.000 3rd Qu.: 46.25
## Max. :100.00 Max. :20.200 Max. :27.000 Max. :101.00
##
## UsoDSuelo
## Bf :10
## Br :10
## Bsi :10
## Cp :10
## Pma :10
## Pmb :10
## (Other):53
# checking, rownames
check.datasets(cr.sp, cr.env)
## Warning: rownames for community and environmental datasets are different
rownames(cr.sp) <- rownames(cr.env)
## managing empty cells
cr.env <- cr.env[complete.cases(cr.env[,1:4]), ]
check.datasets(cr.sp, cr.env)
## OK
M1 <- cr.sp
M2 <- cr.env
##
#M2<- M2[rowSums(M2[,1:4])>0,]
check.datasets(M1,M2)
## OK
ord.model1 <- decorana(decostand(M1, method="hellinger"))
ord.model1 # CHequeamos el largo del gradiente ambiental al que las especies están respondiendo. Gradientes de 3-4 se consideran "grandes" y se considera buena decisión utilizar método de ordenación que asuma respuestas unimodales de las especies. Es decir, promedios ponderados (e.g., Análisis de correspondencia), y no sumas ponderadas (e.g., Componentes principales).
##
## Call:
## decorana(veg = decostand(M1, method = "hellinger"))
##
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
## Total inertia (scaled Chi-square): 8.7011
##
## DCA1 DCA2 DCA3 DCA4
## Eigenvalues 0.3915 0.2481 0.2313 0.1965
## Additive Eigenvalues 0.3915 0.2420 0.2296 0.1962
## Decorana values 0.4330 0.2978 0.2227 0.2030
## Axis lengths 4.6300 3.8308 2.5308 3.1930
La hipótesis de trabajo es que es que la riqueza de vegetación, combinada con otra variable ambiental que representa la densidad de árboles, explican la distribución de las especies en el actual paisaje. Por lo tanto se necesita comprobar con cuáles variables ambientales, la riqueza de vegetación se puede combinar para considerarlas como los mejores predictores de la matriz de especies.
upr <- cca(M1 ~ cobdos + riqveg + num.arb + alt.arbol,
data= M2) # Full Model
lwr <- cca(M1 ~ 1, data=M2) # Null model
vif.cca(upr) # V.I.F for the full model
## cobdos riqveg num.arb alt.arbol
## 2.544008 2.580015 2.190215 2.138332
# Stepwise (FORWARD) selection
set.seed(123)
mods <- ordistep(lwr, scope = formula(upr), trace = 0)
mods$anova
## Df AIC F Pr(>F)
## + riqveg 1 673.33 3.2013 0.005 **
## + cobdos 1 673.81 1.4857 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Stepwise (BACKWARD) selection
mods2 <- step(upr,
scope = list(lower = formula(lwr), upper = formula(upr)),
trace = 0,
test = "perm")
mods2$anova
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 NA NA 108 40753.56 675.3339
## 2 - num.arb 1 437.4136 109 41190.97 674.5403
## 3 - alt.arbol 1 465.4762 110 41656.45 673.8101
## 4 - cobdos 1 562.6371 111 42219.09 673.3261
## Stepwise Selection via Adjusted R^2
### 1. Global test
### 2. Radjusted forward selection
finalModel <- __________(lwr, upr, trace = FALSE)
finalModel
# Adjusted R^2
RsquareAdj(mods)
RsquareAdj(mods2)
RsquareAdj(finalModel)
# VIF
vif.cca(finalModel) # estamos "OK"!
# Utilizando tests de permutaciones para calcular significancias
# Modelo completo
upr$call
## cca(formula = M1 ~ cobdos + riqveg + num.arb + alt.arbol, data = M2)
pstat <- permustats(anova(upr))
summary(pstat)
##
## statistic SES mean lower median upper Pr(perm)
## Model 1.7776 7.9754 0.9953 0.9886 1.1628 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)
# Modelo con el que se decidió trabajar
finalModel$call
## cca(formula = M1 ~ riqveg + cobdos, data = M2)
pstat <- permustats(anova(finalModel))
summary(pstat)
##
## statistic SES mean lower median upper Pr(perm)
## Model 2.3505 10.2622 0.9957 0.9797 1.2323 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Interval (Upper - Lower) = 0.95)
densityplot(pstat)
### model significance
#anova(modelo, model = c("direct", "full", "reduced"),by = c("axis", "terms", "margin"))
### Testing by "Axis"
set.seed(1)
anova(finalModel, by="axis")
## Permutation test for cca under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = M1 ~ riqveg + cobdos, data = M2)
## Df ChiSquare F Pr(>F)
## CCA1 1 0.2810 3.3878 0.001 ***
## CCA2 1 0.1089 1.3132 0.047 *
## Residual 110 9.1252
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Testing ny "Terms"
set.seed(5)
anova(finalModel,
by = "terms")
## Permutation test for cca under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = M1 ~ riqveg + cobdos, data = M2)
## Df ChiSquare F Pr(>F)
## riqveg 1 0.2667 3.2153 0.001 ***
## cobdos 1 0.1233 1.4857 0.011 *
## Residual 110 9.1252
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Calcula la importancia de cada término, cuando este se remueve del modelo global, i.e., “Efecto Marginal” . (utilizar el argumento adecuado)
### Marginal effects (MORE USEFUL !)
set.seed(10)
anova(finalModel, by = "_________")
plot2 <- ordiplot(finalModel, choices = c(1,2), hill = TRUE)
sites.long <- sites.long(plot2, env.data=M2)
species.long <- species.long(plot2)
axis.long <- axis.long(finalModel, choices = c(1,2))
env.fit
para variables respuesta (Spp)spec.envfit <- envfit(plot2, env=M1)
spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals)
species.long2 <- species.long(plot2, spec.data=spec.data.envfit)
head(species.long2, n=20)
## r p axis1 axis2 labels
## 1 0.032293827 0.128 -0.90757849 0.10364824 1
## 2 0.069526516 0.017 0.18138883 0.13475252 2
## 3 0.003597418 0.823 -0.49405560 0.37623479 3
## 4 0.006687715 0.641 0.01213478 -0.06934233 4
## 5 0.014625186 0.421 0.44751659 0.01774183 5
## 6 0.042612038 0.101 -0.66623732 0.86334608 6
## 7 0.011600890 0.485 -0.77606899 -0.84447654 7
## 8 0.009602421 0.448 -0.49512891 0.72307565 8
## 9 0.013723349 0.424 0.42966045 -0.02074001 9
## 10 0.001483311 0.870 -0.72758542 -0.64544286 10
## 11 0.018491019 0.345 0.03500078 -0.15318092 11
## 12 0.028507020 0.220 -2.26130742 -0.88195472 12
## 13 0.004352336 0.778 0.09162240 0.29007577 13
## 14 0.113732535 0.022 -2.08862912 -0.61151892 14
## 15 0.202765800 0.001 -2.63746042 -1.30642803 15
## 16 0.048874475 0.078 -0.54662864 0.17255316 16
## 17 0.008529457 0.544 -0.38401839 0.30920651 17
## 18 0.014093521 0.428 0.35104346 -0.03299250 18
## 19 0.004431857 0.740 0.70742714 -0.11452255 19
## 20 0.002282268 0.781 0.06258033 0.43422378 20
envfit()
para las
variables ambientales. Necesitamos obtener los ‘biplot
scores’.## Vector Fit
vectors<- data.frame(cbind(vector= rownames(data.frame(plot2$biplot)), axis1=data.frame(plot2$biplot)[,1] , axis2=data.frame(plot2$biplot)[,2] ) )
vectors$vector <- sapply(vectors[,1], as.factor)
vectors$axis1 <- as.numeric(vectors$axis1)
vectors$axis2 <- as.numeric(vectors$axis2)
vectors
## vector axis1 axis2
## 1 riqveg -0.9575183 -0.2883724
## 2 cobdos -0.8414543 0.5403283
#vectors.envfit <- envfit(plot2, env= M2[,c(3,14)])
#vectors.long3 <- vectorfit.long()
#vectors.long3
### Spec r >= 0%
species.long3 <- species.long2[species.long2$r >= 0, ]
head(species.long3, n=20)
## r p axis1 axis2 labels
## 1 0.032293827 0.128 -0.90757849 0.10364824 1
## 2 0.069526516 0.017 0.18138883 0.13475252 2
## 3 0.003597418 0.823 -0.49405560 0.37623479 3
## 4 0.006687715 0.641 0.01213478 -0.06934233 4
## 5 0.014625186 0.421 0.44751659 0.01774183 5
## 6 0.042612038 0.101 -0.66623732 0.86334608 6
## 7 0.011600890 0.485 -0.77606899 -0.84447654 7
## 8 0.009602421 0.448 -0.49512891 0.72307565 8
## 9 0.013723349 0.424 0.42966045 -0.02074001 9
## 10 0.001483311 0.870 -0.72758542 -0.64544286 10
## 11 0.018491019 0.345 0.03500078 -0.15318092 11
## 12 0.028507020 0.220 -2.26130742 -0.88195472 12
## 13 0.004352336 0.778 0.09162240 0.29007577 13
## 14 0.113732535 0.022 -2.08862912 -0.61151892 14
## 15 0.202765800 0.001 -2.63746042 -1.30642803 15
## 16 0.048874475 0.078 -0.54662864 0.17255316 16
## 17 0.008529457 0.544 -0.38401839 0.30920651 17
## 18 0.014093521 0.428 0.35104346 -0.03299250 18
## 19 0.004431857 0.740 0.70742714 -0.11452255 19
## 20 0.002282268 0.781 0.06258033 0.43422378 20
### Var Ambientales r >= 0
#vectors.long3 <- vectors.long3[vectors.long3$r >= 0, ]
#vectors.long3
#plot2 <- ordiplot(ord.model1, choices=c(1,2))
#plot2
#usosuelo.ellipses <- ordiellipse(plot2, groups=M2$UsoDSuelo, display="sites", kind="se")
#usosuelo.ellipses.long <- ordiellipse.long(usosuelo.ellipses, grouping.name="UsoDeSuelo")
#plot2 <- ordiplot(ord.model1, choices=c(1,2))
#plot2
#centroids.long1 <- centroids.long(sites.long, grouping=sites.long$UsoDSuelo, FUN = mean, centroids.only = TRUE)
f = 1
plotgg2 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis.long[1, "label"]) +
ylab(axis.long[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
#geom_point(data=sites.long,
# aes(x=axis1, y=axis2),
# size=3, shape=1) +
#geom_text_repel(data=sites.long,aes(x=axis1, y=axis2, label=labels),colour="black") +
geom_point(data=species.long2,aes(x=axis1*f, y=axis2*f)) +
#geom_segment(data=species.long3,aes(x=0, y=0, xend=axis1*f, yend=axis2*f),colour="red", linewidth=0.7, arrow=arrow(angle=20)) +
geom_text_repel(data=species.long3,aes(x=axis1*f, y=axis2*f, label=labels),colour="red", max.overlaps = 10) +
geom_segment(data=vectors,aes(x=0, y=0, xend=axis1*f*4, yend=axis2*f*4),colour="blue", linewidth=0.7, arrow=arrow(angle=20)) +
geom_text_repel(data=vectors,aes(x=axis1*f*4, y= axis2*f*4, label=vector),colour="blue") +
#geom_polygon(data=usosuelo.ellipses.long,
# aes(x=axis1, y=axis2, colour= usosuelo.ellipses.long$UsoDeSuelo,
# fill=after_scale(alpha(colour, 0.2))),
# linewidth=0.2, show.legend=TRUE) +
#geom_point(data=centroids.long1,
# aes(x=axis1c,y=axis2c, col=centroids.long1$Centroid)) +
# ggsci::scale_colour_npg() +
coord_fixed(ratio=1)
plotgg2
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,87]), add=TRUE, col="orange", main= "'87' response to environmental gradient") # respuesta de la spp "#87". Abundancia más alta esperada a valores intermedios de riqueza de vegetación y cobertura de dosel
plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,_____]), add=TRUE, col="orange", main= "'______' response to environmental gradient")
plot2 <- ordiplot(finalModel, choices = c(1,2), display = c("sites", "bp"), scaling=2)
ordisurf(plot2, y=as.numeric(M1[,_____]), add=TRUE, col="orange", main= "'______' response to environmental gradient")