library(BiodiversityR) # also loads vegan
library(poppr) # also loads adegenet
library(ggplot2)
library(ggsci)
library(ggforce)
library(dplyr)
library(ggrepel)
Jombart et al. 2010 described their methodology of Discriminant Analysis of Principal Components(DAPC) for the analysis of genetically structured populations as a new methodological approach that retained all the assets of Discriminant Analysis, whereas not being burdened by its limitation of requiring the number of variables (alleles) to be less than the number of observations (individuals).
In this document, I demonstrate that the same features (allowing for more variables than individuals) apply to the methodology of Redundancy Analysis. This widely used method from the field of community ecology does not require a prior step of principal components analysis and has some other advantages, such as a possibility to use several explanatory variables, allowing for variance partitioning methods, and it can also be directly interpreted in terms of Analysis of Molecular Variance (such as the distances among centroids directly relating to Sums-of-Squares). More importantly, Redundancy Analysis was able to retrieve the same ordination configurations as DAPC.
Methods of using redundancy analysis with the vegan and BiodiversityR packages, including data preparation steps and publication-ready plotting methods via ggplot2, are shown here with the same simulated data sets that were used when introducing the DAPC method.
The dapcIllus data in adegenet contains the simulated data sets that were used in the article by Jombart et al. 2010 of Discriminant analysis of principal components: a new method for the analysis of genetically structured populations.
These simulated data sets are in the following configurations:
a: an island model with 6 populations
b: a hierarchical island model with 6 populations (3, 2, 1)
c: a one-dimensional stepping-stone model with 6 populations, a boundary, and 6 other populations
d: a one-dimenstional stepping stone model with 24 populations
data(dapcIllus)
attach(dapcIllus) # attached the 4 data sets as a, b, c and d
str(a)
## Formal class 'genind' [package ""] with 11 slots
## ..@ tab : int [1:600, 1:140] 1 1 1 2 1 2 1 1 2 1 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:600] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:140] "loc-1.03" "loc-1.19" "loc-2.01" "loc-2.04" ...
## ..@ loc.fac : Factor w/ 30 levels "L01","L02","L03",..: 1 1 2 2 2 2 2 3 3 3 ...
## ..@ loc.n.all: Named int [1:30] 2 5 4 3 8 3 3 5 7 5 ...
## .. ..- attr(*, "names")= chr [1:30] "L01" "L02" "L03" "L04" ...
## ..@ all.names:List of 30
## .. ..$ L01: Named chr [1:2] "03" "19"
## .. .. ..- attr(*, "names")= chr [1:2] "1" "2"
## .. ..$ L02: Named chr [1:5] "01" "04" "40" "41" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L03: Named chr [1:4] "08" "10" "38" "45"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L04: Named chr [1:3] "22" "36" "49"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L05: Named chr [1:8] "04" "08" "15" "17" ...
## .. .. ..- attr(*, "names")= chr [1:8] "1" "2" "3" "4" ...
## .. ..$ L06: Named chr [1:3] "14" "15" "33"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L07: Named chr [1:3] "06" "37" "40"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L08: Named chr [1:5] "10" "21" "33" "44" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L09: Named chr [1:7] "01" "13" "24" "25" ...
## .. .. ..- attr(*, "names")= chr [1:7] "1" "2" "3" "4" ...
## .. ..$ L10: Named chr [1:5] "03" "22" "30" "37" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L11: Named chr [1:8] "05" "11" "16" "21" ...
## .. .. ..- attr(*, "names")= chr [1:8] "1" "2" "3" "4" ...
## .. ..$ L12: Named chr [1:4] "01" "09" "20" "21"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L13: Named chr [1:3] "34" "44" "47"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L14: Named chr [1:3] "30" "48" "50"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L15: Named chr [1:2] "21" "30"
## .. .. ..- attr(*, "names")= chr [1:2] "1" "2"
## .. ..$ L16: Named chr [1:6] "10" "12" "14" "25" ...
## .. .. ..- attr(*, "names")= chr [1:6] "1" "2" "3" "4" ...
## .. ..$ L17: Named chr [1:4] "06" "23" "38" "46"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L18: Named chr [1:4] "02" "25" "49" "50"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L19: Named chr [1:6] "01" "13" "37" "43" ...
## .. .. ..- attr(*, "names")= chr [1:6] "1" "2" "3" "4" ...
## .. ..$ L20: Named chr [1:6] "11" "12" "17" "20" ...
## .. .. ..- attr(*, "names")= chr [1:6] "1" "2" "3" "4" ...
## .. ..$ L21: Named chr [1:8] "04" "15" "17" "31" ...
## .. .. ..- attr(*, "names")= chr [1:8] "1" "2" "3" "4" ...
## .. ..$ L22: Named chr [1:3] "20" "24" "49"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L23: Named chr [1:5] "11" "25" "38" "43" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L24: Named chr [1:3] "02" "12" "43"
## .. .. ..- attr(*, "names")= chr [1:3] "1" "2" "3"
## .. ..$ L25: Named chr [1:5] "18" "23" "28" "30" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L26: Named chr [1:4] "01" "02" "23" "30"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L27: Named chr [1:5] "03" "06" "22" "24" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## .. ..$ L28: Named chr [1:7] "07" "13" "14" "20" ...
## .. .. ..- attr(*, "names")= chr [1:7] "1" "2" "3" "4" ...
## .. ..$ L29: Named chr [1:4] "14" "17" "30" "44"
## .. .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
## .. ..$ L30: Named chr [1:5] "26" "34" "42" "49" ...
## .. .. ..- attr(*, "names")= chr [1:5] "1" "2" "3" "4" ...
## ..@ ploidy : int [1:600] 2 2 2 2 2 2 2 2 2 2 ...
## ..@ type : chr "codom"
## ..@ other : NULL
## ..@ call : language read.fstat(file = file, missing = missing, quiet = quiet)
## ..@ pop : Factor w/ 6 levels "P1","P2","P3",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..@ strata : NULL
## ..@ hierarchy: NULL
head(a$tab)
## loc-1.03 loc-1.19 loc-2.01 loc-2.04 loc-2.40 loc-2.41 loc-2.44 loc-3.08
## 1 1 1 0 0 1 0 1 0
## 2 1 1 0 0 2 0 0 0
## 3 1 1 0 0 1 0 1 0
## 4 2 0 0 0 0 0 2 0
## 5 1 1 0 0 1 0 1 0
## 6 2 0 0 0 1 0 1 1
## loc-3.10 loc-3.38 loc-3.45 loc-4.22 loc-4.36 loc-4.49 loc-5.04 loc-5.08
## 1 2 0 0 2 0 0 0 0
## 2 2 0 0 1 0 1 0 0
## 3 2 0 0 1 0 1 0 0
## 4 0 0 2 2 0 0 0 0
## 5 1 0 1 1 0 1 0 0
## 6 1 0 0 1 0 1 0 0
## loc-5.15 loc-5.17 loc-5.18 loc-5.30 loc-5.41 loc-5.44 loc-6.14 loc-6.15
## 1 1 0 0 0 1 0 2 0
## 2 0 0 0 0 1 1 2 0
## 3 0 0 0 0 2 0 2 0
## 4 0 0 0 0 2 0 2 0
## 5 0 0 0 0 1 1 2 0
## 6 0 0 0 0 1 1 2 0
## loc-6.33 loc-7.06 loc-7.37 loc-7.40 loc-8.10 loc-8.21 loc-8.33 loc-8.44
## 1 0 1 1 0 0 1 1 0
## 2 0 1 1 0 0 2 0 0
## 3 0 0 2 0 0 0 2 0
## 4 0 0 2 0 0 2 0 0
## 5 0 1 1 0 0 1 1 0
## 6 0 1 1 0 1 0 1 0
## loc-8.48 loc-9.01 loc-9.13 loc-9.24 loc-9.25 loc-9.35 loc-9.40 loc-9.41
## 1 0 1 0 0 0 1 0 0
## 2 0 1 0 0 0 1 0 0
## 3 0 2 0 0 0 0 0 0
## 4 0 2 0 0 0 0 0 0
## 5 0 2 0 0 0 0 0 0
## 6 0 2 0 0 0 0 0 0
## loc-10.03 loc-10.22 loc-10.30 loc-10.37 loc-10.49 loc-11.05 loc-11.11
## 1 0 2 0 0 0 0 0
## 2 0 2 0 0 0 1 0
## 3 1 1 0 0 0 1 0
## 4 1 1 0 0 0 0 0
## 5 1 1 0 0 0 2 0
## 6 0 2 0 0 0 2 0
## loc-11.16 loc-11.21 loc-11.32 loc-11.37 loc-11.39 loc-11.41 loc-12.01
## 1 0 0 0 0 1 1 1
## 2 0 0 0 0 1 0 1
## 3 0 0 0 0 1 0 1
## 4 0 0 0 0 1 1 1
## 5 0 0 0 0 0 0 1
## 6 0 0 0 0 0 0 1
## loc-12.09 loc-12.20 loc-12.21 loc-13.34 loc-13.44 loc-13.47 loc-14.30
## 1 0 0 1 2 0 0 2
## 2 0 0 1 2 0 0 2
## 3 0 0 1 2 0 0 2
## 4 0 0 1 2 0 0 2
## 5 0 0 1 2 0 0 1
## 6 0 0 1 2 0 0 2
## loc-14.48 loc-14.50 loc-15.21 loc-15.30 loc-16.10 loc-16.12 loc-16.14
## 1 0 0 1 1 0 0 0
## 2 0 0 0 2 0 0 0
## 3 0 0 0 2 0 0 0
## 4 0 0 0 2 0 0 0
## 5 0 1 0 2 0 0 0
## 6 0 0 0 2 0 0 0
## loc-16.25 loc-16.34 loc-16.39 loc-17.06 loc-17.23 loc-17.38 loc-17.46
## 1 0 2 0 0 0 2 0
## 2 1 1 0 0 0 2 0
## 3 1 1 0 0 0 2 0
## 4 2 0 0 0 0 2 0
## 5 2 0 0 0 0 2 0
## 6 2 0 0 0 0 2 0
## loc-18.02 loc-18.25 loc-18.49 loc-18.50 loc-19.01 loc-19.13 loc-19.37
## 1 2 0 0 0 0 0 1
## 2 2 0 0 0 0 0 0
## 3 2 0 0 0 0 0 2
## 4 2 0 0 0 0 0 2
## 5 2 0 0 0 0 0 0
## 6 2 0 0 0 0 0 0
## loc-19.43 loc-19.44 loc-19.49 loc-20.11 loc-20.12 loc-20.17 loc-20.20
## 1 1 0 0 0 0 0 2
## 2 2 0 0 0 0 0 2
## 3 0 0 0 0 0 0 2
## 4 0 0 0 0 0 0 2
## 5 2 0 0 0 0 0 2
## 6 2 0 0 0 0 0 2
## loc-20.28 loc-20.34 loc-21.04 loc-21.15 loc-21.17 loc-21.31 loc-21.39
## 1 0 0 0 1 0 0 0
## 2 0 0 0 0 1 0 0
## 3 0 0 0 1 1 0 0
## 4 0 0 0 1 0 0 1
## 5 0 0 0 0 1 0 1
## 6 0 0 0 1 1 0 0
## loc-21.42 loc-21.43 loc-21.50 loc-22.20 loc-22.24 loc-22.49 loc-23.11
## 1 1 0 0 0 2 0 1
## 2 1 0 0 0 2 0 0
## 3 0 0 0 0 2 0 0
## 4 0 0 0 0 0 2 2
## 5 0 0 0 0 1 1 0
## 6 0 0 0 0 1 1 1
## loc-23.25 loc-23.38 loc-23.43 loc-23.46 loc-24.02 loc-24.12 loc-24.43
## 1 0 0 1 0 0 1 1
## 2 2 0 0 0 0 2 0
## 3 2 0 0 0 0 1 1
## 4 0 0 0 0 0 0 2
## 5 2 0 0 0 0 1 1
## 6 1 0 0 0 0 1 1
## loc-25.18 loc-25.23 loc-25.28 loc-25.30 loc-25.34 loc-26.01 loc-26.02
## 1 0 0 0 2 0 0 2
## 2 0 0 1 1 0 0 2
## 3 0 0 1 1 0 0 2
## 4 0 0 1 1 0 0 2
## 5 0 0 1 1 0 0 2
## 6 0 0 2 0 0 0 2
## loc-26.23 loc-26.30 loc-27.03 loc-27.06 loc-27.22 loc-27.24 loc-27.34
## 1 0 0 0 0 2 0 0
## 2 0 0 0 0 2 0 0
## 3 0 0 0 0 2 0 0
## 4 0 0 2 0 0 0 0
## 5 0 0 0 0 2 0 0
## 6 0 0 0 0 2 0 0
## loc-28.07 loc-28.13 loc-28.14 loc-28.20 loc-28.27 loc-28.42 loc-28.47
## 1 0 1 0 0 0 1 0
## 2 0 1 0 0 0 1 0
## 3 0 1 0 0 0 1 0
## 4 0 0 0 0 0 2 0
## 5 0 2 0 0 0 0 0
## 6 0 1 0 0 0 1 0
## loc-29.14 loc-29.17 loc-29.30 loc-29.44 loc-30.26 loc-30.34 loc-30.42
## 1 1 0 0 1 0 1 0
## 2 0 1 0 1 0 0 0
## 3 1 0 1 0 0 0 0
## 4 0 0 2 0 0 0 0
## 5 1 0 1 0 0 0 0
## 6 1 0 0 1 0 0 0
## loc-30.49 loc-30.50
## 1 1 0
## 2 2 0
## 3 2 0
## 4 2 0
## 5 2 0
## 6 2 0
str(a$tab)
## int [1:600, 1:140] 1 1 1 2 1 2 1 1 2 1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:600] "1" "2" "3" "4" ...
## ..$ : chr [1:140] "loc-1.03" "loc-1.19" "loc-2.01" "loc-2.04" ...
As part of the procedure when the cluster structure is not known a priori, a K-means process is used to cluster individuals in different groups. This method is implemented in the data(dapcIllus) example via the adegenet::find.cluster function.
clust.a <- find.clusters(a, n.pca=100, n.clust=6)
unique(clust.a$grp)
## [1] 6 4 5 3 2 1
## Levels: 1 2 3 4 5 6
clust.b <- find.clusters(b, n.pca=100, n.clust=6)
unique(clust.b$grp)
## [1] 1 3 5 4 6 2
## Levels: 1 2 3 4 5 6
new.sequence <- unique(clust.b$grp)
levels(clust.b$grp) <-list("1"=new.sequence[1],
"2"=new.sequence[2],
"3"=new.sequence[3],
"4"=new.sequence[4],
"5"=new.sequence[5],
"6"=new.sequence[6])
unique(clust.b$grp)
## [1] 1 2 3 4 5 6
## Levels: 1 2 3 4 5 6
clust.c <- find.clusters(c, n.pca=100, n.clust=12)
unique(clust.c$grp)
## [1] 6 4 10 8 5 12 9 1 11 2 3 7
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12
clust.d <- find.clusters(d, n.pca=100, n.clust=24)
unique(clust.c$grp)
## [1] 6 4 10 8 5 12 9 1 11 2 3 7
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12
Whereas the example pre-selected the number of clusters and axes, it is also possible to search for the number of clusters by setting the argument n.clust to NULL - and this would be the expected situation and rationale for using a K-means procedure in the first place. Among the other argument options, the documentation of adegenet::find.clusters recommends to set its argument criterion to ‘diffNgroup’. I opted to set max.n.clust to 30. A user can set argument choose.n.clust to TRUE to be asked for a visual inspection of the graphs.
set.seed(1)
clust.a1 <- find.clusters(a, n.pca=100, n.clust=NULL,
criterion="diffNgroup", choose.n.clust=FALSE,
max.n.clust=30, n.start=5)
unique(clust.a1$grp)
## [1] 3 4 1 6 2 5
## Levels: 1 2 3 4 5 6
# Use this command (default of choose.n.clust=TRUE) to inspect the graph
# find.clusters(a, n.pca=100, n.clust=NULL, criterion="diffNgroup", max.n.clust=30, n.start=5)
set.seed(1)
clust.b1 <- find.clusters(b, n.pca=100, n.clust=NULL,
criterion="diffNgroup", choose.n.clust=FALSE,
max.n.clust=30, n.start=5)
unique(clust.b1$grp)
## [1] 4 2 6 1 3 5
## Levels: 1 2 3 4 5 6
# Use this command to inspect the graph
# find.clusters(b, n.pca=100, n.clust=NULL, criterion="diffNgroup", max.n.clust=30, n.start=5)
set.seed(1)
clust.c1 <- find.clusters(c, n.pca=100, n.clust=NULL,
criterion="diffNgroup", choose.n.clust=FALSE,
max.n.clust=30, n.start=5)
unique(clust.c1$grp)
## [1] 1 2
## Levels: 1 2
# Use this command to inspect the graph
# find.clusters(c, n.pca=100, n.clust=NULL, criterion="diffNgroup", max.n.clust=30, n.start=5)
set.seed(1)
clust.d1 <- find.clusters(d, n.pca=100, n.clust=NULL,
criterion="diffNgroup", choose.n.clust=FALSE,
max.n.clust=30, n.start=5)
unique(clust.d1$grp)
## [1] 1 2 3
## Levels: 1 2 3
# Use this command to inspect the graph
# find.clusters(d, n.pca=100, n.clust=NULL, criterion="diffNgroup", max.n.clust=30, n.start=5)
The example is followed with the cluster structure with the pre-defined number of clusters. Note that argument n.da is set to the number of clusters - 1.
dapc.a <- dapc(a, pop=clust.a$grp, n.pca=100, n.da=5)
dapc.b <- dapc(b, pop=clust.b$grp, n.pca=100, n.da=5)
dapc.c <- dapc(c, pop=clust.c$grp, n.pca=100, n.da=11)
dapc.d <- dapc(d, pop=clust.d$grp, n.pca=100, n.da=23)
dapc.a
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = a, pop = clust.a$grp, n.pca = 100, n.da = 5)
##
## $n.pca: 100 first PCs of PCA used
## $n.da: 5 discriminant functions saved
## $var (proportion of conserved variance): 1
##
## $eig (eigenvalues): 1300 989.6 749.6 635.6 466.2 vector length content
## 1 $eig 5 eigenvalues
## 2 $grp 600 prior group assignment
## 3 $prior 6 prior group probabilities
## 4 $assign 600 posterior group assignment
## 5 $pca.cent 140 centring vector of PCA
## 6 $pca.norm 140 scaling vector of PCA
## 7 $pca.eig 109 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 600 100 retained PCs of PCA
## 2 $means 6 100 group means
## 3 $loadings 100 5 loadings of variables
## 4 $ind.coord 600 5 coordinates of individuals (principal components)
## 5 $grp.coord 6 5 coordinates of groups
## 6 $posterior 600 6 posterior membership probabilities
## 7 $pca.loadings 140 100 PCA loadings of original variables
## 8 $var.contr 140 5 contribution of original variables
summary(dapc.a)
## $n.dim
## [1] 5
##
## $n.pop
## [1] 6
##
## $assign.prop
## [1] 0.995
##
## $assign.per.pop
## 1 2 3 4 5 6
## 1.0000000 0.9896907 1.0000000 0.9904762 1.0000000 0.9897959
##
## $prior.grp.size
##
## 1 2 3 4 5 6
## 102 97 99 105 99 98
##
## $post.grp.size
##
## 1 2 3 4 5 6
## 102 97 100 104 99 98
scatter(dapc.a)
dapc.b
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = b, pop = clust.b$grp, n.pca = 100, n.da = 5)
##
## $n.pca: 100 first PCs of PCA used
## $n.da: 5 discriminant functions saved
## $var (proportion of conserved variance): 0.996
##
## $eig (eigenvalues): 750.5 688.7 336.5 308.3 243.2 vector length content
## 1 $eig 5 eigenvalues
## 2 $grp 600 prior group assignment
## 3 $prior 6 prior group probabilities
## 4 $assign 600 posterior group assignment
## 5 $pca.cent 163 centring vector of PCA
## 6 $pca.norm 163 scaling vector of PCA
## 7 $pca.eig 133 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 600 100 retained PCs of PCA
## 2 $means 6 100 group means
## 3 $loadings 100 5 loadings of variables
## 4 $ind.coord 600 5 coordinates of individuals (principal components)
## 5 $grp.coord 6 5 coordinates of groups
## 6 $posterior 600 6 posterior membership probabilities
## 7 $pca.loadings 163 100 PCA loadings of original variables
## 8 $var.contr 163 5 contribution of original variables
summary(dapc.b)
## $n.dim
## [1] 5
##
## $n.pop
## [1] 6
##
## $assign.prop
## [1] 0.985
##
## $assign.per.pop
## 1 2 3 4 5 6
## 0.9907407 0.9800000 0.9750000 0.9861111 0.9898990 0.9900990
##
## $prior.grp.size
##
## 1 2 3 4 5 6
## 108 100 120 72 99 101
##
## $post.grp.size
##
## 1 2 3 4 5 6
## 109 99 117 74 101 100
scatter(dapc.b)
dapc.c
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = c, pop = clust.c$grp, n.pca = 100, n.da = 11)
##
## $n.pca: 100 first PCs of PCA used
## $n.da: 11 discriminant functions saved
## $var (proportion of conserved variance): 0.998
##
## $eig (eigenvalues): 23390 2106 1620 507.7 440.7 ...
##
## vector length content
## 1 $eig 11 eigenvalues
## 2 $grp 600 prior group assignment
## 3 $prior 12 prior group probabilities
## 4 $assign 600 posterior group assignment
## 5 $pca.cent 160 centring vector of PCA
## 6 $pca.norm 160 scaling vector of PCA
## 7 $pca.eig 130 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 600 100 retained PCs of PCA
## 2 $means 12 100 group means
## 3 $loadings 100 11 loadings of variables
## 4 $ind.coord 600 11 coordinates of individuals (principal components)
## 5 $grp.coord 12 11 coordinates of groups
## 6 $posterior 600 12 posterior membership probabilities
## 7 $pca.loadings 160 100 PCA loadings of original variables
## 8 $var.contr 160 11 contribution of original variables
summary(dapc.c)
## $n.dim
## [1] 11
##
## $n.pop
## [1] 12
##
## $assign.prop
## [1] 0.9933333
##
## $assign.per.pop
## 1 2 3 4 5 6 7 8
## 1.0000000 1.0000000 0.9777778 0.9791667 0.9800000 1.0000000 1.0000000 1.0000000
## 9 10 11 12
## 1.0000000 1.0000000 0.9545455 1.0000000
##
## $prior.grp.size
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 47 31 45 48 50 102 55 49 53 51 22 47
##
## $post.grp.size
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 47 32 44 47 49 103 56 50 53 51 21 47
scatter(dapc.c)
dapc.d
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = d, pop = clust.d$grp, n.pca = 100, n.da = 23)
##
## $n.pca: 100 first PCs of PCA used
## $n.da: 23 discriminant functions saved
## $var (proportion of conserved variance): 0.992
##
## $eig (eigenvalues): 11080 2720 799.2 500.4 367.1 ...
##
## vector length content
## 1 $eig 23 eigenvalues
## 2 $grp 600 prior group assignment
## 3 $prior 24 prior group probabilities
## 4 $assign 600 posterior group assignment
## 5 $pca.cent 185 centring vector of PCA
## 6 $pca.norm 185 scaling vector of PCA
## 7 $pca.eig 154 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 600 100 retained PCs of PCA
## 2 $means 24 100 group means
## 3 $loadings 100 23 loadings of variables
## 4 $ind.coord 600 23 coordinates of individuals (principal components)
## 5 $grp.coord 24 23 coordinates of groups
## 6 $posterior 600 24 posterior membership probabilities
## 7 $pca.loadings 185 100 PCA loadings of original variables
## 8 $var.contr 185 23 contribution of original variables
summary(dapc.d)
## $n.dim
## [1] 23
##
## $n.pop
## [1] 24
##
## $assign.prop
## [1] 1
##
## $assign.per.pop
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## $prior.grp.size
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 38 23 24 12 25 23 28 26 23 27 21 21 40 17 24 51 20 26 25 17 26 26 10 27
##
## $post.grp.size
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 38 23 24 12 25 23 28 26 23 27 21 21 40 17 24 51 20 26 25 17 26 26 10 27
scatter(dapc.d)
Similar as in a publication that showed the similarities between Analysis of Moleculare Variance (AMOVA) and Multivariate Analysis of Variance Using Distance Matrices and (distance-based) redundancy analysis via the vegan and BiodiversityR packages, two data sets need to be prepared, the community data set and the environmental data set. This procedure is not that difficult (and actually by far the longest parts of the scripts are used to create names of individuals that reflect their population; this could for instance make it easier in subsequent research such as procedures to identify potential recent migrants as shown also in the document above).
For data a:
a.comm <- data.frame(as.matrix(a$tab))
str(a.comm)
## 'data.frame': 600 obs. of 140 variables:
## $ loc.1.03 : int 1 1 1 2 1 2 1 1 2 1 ...
## $ loc.1.19 : int 1 1 1 0 1 0 1 1 0 1 ...
## $ loc.2.01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.40 : int 1 2 1 0 1 1 1 1 1 1 ...
## $ loc.2.41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.44 : int 1 0 1 2 1 1 1 1 1 1 ...
## $ loc.3.08 : int 0 0 0 0 0 1 0 0 0 1 ...
## $ loc.3.10 : int 2 2 2 0 1 1 2 1 1 1 ...
## $ loc.3.38 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.45 : int 0 0 0 2 1 0 0 1 1 0 ...
## $ loc.4.22 : int 2 1 1 2 1 1 1 1 0 1 ...
## $ loc.4.36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.49 : int 0 1 1 0 1 1 1 1 2 1 ...
## $ loc.5.04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.15 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.17 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.30 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.41 : int 1 1 2 2 1 1 2 1 2 2 ...
## $ loc.5.44 : int 0 1 0 0 1 1 0 1 0 0 ...
## $ loc.6.14 : int 2 2 2 2 2 2 2 2 2 1 ...
## $ loc.6.15 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.33 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ loc.7.06 : int 1 1 0 0 1 1 1 0 2 0 ...
## $ loc.7.37 : int 1 1 2 2 1 1 1 2 0 2 ...
## $ loc.7.40 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.10 : int 0 0 0 0 0 1 1 1 0 0 ...
## $ loc.8.21 : int 1 2 0 2 1 0 1 1 0 1 ...
## $ loc.8.33 : int 1 0 2 0 1 1 0 0 2 1 ...
## $ loc.8.44 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.01 : int 1 1 2 2 2 2 1 1 1 0 ...
## $ loc.9.13 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.24 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.35 : int 1 1 0 0 0 0 1 1 1 2 ...
## $ loc.9.40 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.03: int 0 0 1 1 1 0 1 1 1 1 ...
## $ loc.10.22: int 2 2 1 1 1 2 1 1 1 1 ...
## $ loc.10.30: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.37: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.49: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.05: int 0 1 1 0 2 2 2 1 2 1 ...
## $ loc.11.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.16: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.21: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.32: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.37: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.39: int 1 1 1 1 0 0 0 1 0 1 ...
## $ loc.11.41: int 1 0 0 1 0 0 0 0 0 0 ...
## $ loc.12.01: int 1 1 1 1 1 1 1 2 0 2 ...
## $ loc.12.09: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.20: int 0 0 0 0 0 0 1 0 0 0 ...
## $ loc.12.21: int 1 1 1 1 1 1 0 0 2 0 ...
## $ loc.13.34: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.13.44: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.47: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.30: int 2 2 2 2 1 2 2 2 2 2 ...
## $ loc.14.48: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.50: int 0 0 0 0 1 0 0 0 0 0 ...
## $ loc.15.21: int 1 0 0 0 0 0 1 0 0 1 ...
## $ loc.15.30: int 1 2 2 2 2 2 1 2 2 1 ...
## $ loc.16.10: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.14: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.25: int 0 1 1 2 2 2 1 1 1 1 ...
## $ loc.16.34: int 2 1 1 0 0 0 1 1 1 1 ...
## $ loc.16.39: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.06: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.23: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.38: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.17.46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.02: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.18.25: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.49: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.50: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.19.01: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.19.13: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.19.37: int 1 0 2 2 0 0 1 0 1 0 ...
## $ loc.19.43: int 1 2 0 0 2 2 1 2 1 2 ...
## $ loc.19.44: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.19.49: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.20.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.20.12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.20.17: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.20.20: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.20.28: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.20.34: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.21.04: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.21.15: int 1 0 1 1 0 1 1 1 1 0 ...
## $ loc.21.17: int 0 1 1 0 1 1 0 1 0 1 ...
## $ loc.21.31: int 0 0 0 0 0 0 1 0 0 0 ...
## $ loc.21.39: int 0 0 0 1 1 0 0 0 0 0 ...
## $ loc.21.42: int 1 1 0 0 0 0 0 0 1 1 ...
## $ loc.21.43: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.21.50: int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
a.env <- data.frame(Pop=clust.a$grp)
# order both data.sets sequentially by population - this is only important for the naming of individuals
pop.seq <- order(a.env$Pop)
a.env[pop.seq, "Pop"]
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
## [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [223] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [260] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [297] 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [334] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [371] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
## [408] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [445] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [482] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [519] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [556] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [593] 6 6 6 6 6 6 6 6
## Levels: 1 2 3 4 5 6
a.comm <- a.comm[pop.seq, ]
a.env <- a.env[pop.seq, , drop=FALSE]
table(a.env$Pop)
##
## 1 2 3 4 5 6
## 102 97 99 105 99 98
names.new <- paste0("P", a.env$Pop)
names.new2 <- names.new
names.new2[1] <- paste0(names.new[1], ".", 1)
counter <- 1
for (i in 2:length(names.new)) {
if (names.new[i] == names.new[i-1]) {
counter <- counter + 1
}else{
counter <- 1
}
names.new2[i] <- paste0(names.new[i], ".", counter)
}
a.env$Pop.names <- factor(names.new, levels=unique(names.new))
a.env$Ind.names <- names.new2
rownames(a.comm) <- names.new2
rownames(a.env) <- names.new2
BiodiversityR::check.datasets(a.comm, a.env)
## OK
head(a.comm)
## loc.1.03 loc.1.19 loc.2.01 loc.2.04 loc.2.40 loc.2.41 loc.2.44 loc.3.08
## P1.1 2 0 0 0 0 0 2 1
## P1.2 2 0 0 0 1 0 1 1
## P1.3 2 0 0 0 2 0 0 0
## P1.4 2 0 0 0 2 0 0 0
## P1.5 2 0 0 0 1 0 1 0
## P1.6 2 0 1 0 0 0 1 0
## loc.3.10 loc.3.38 loc.3.45 loc.4.22 loc.4.36 loc.4.49 loc.5.04 loc.5.08
## P1.1 0 0 1 0 0 2 0 0
## P1.2 0 0 1 0 0 2 0 0
## P1.3 1 0 1 1 0 1 0 0
## P1.4 0 0 2 0 0 2 0 0
## P1.5 0 0 2 0 0 2 0 0
## P1.6 1 0 1 0 0 2 0 0
## loc.5.15 loc.5.17 loc.5.18 loc.5.30 loc.5.41 loc.5.44 loc.6.14 loc.6.15
## P1.1 0 0 0 0 1 1 2 0
## P1.2 1 0 0 0 1 0 2 0
## P1.3 1 0 0 0 1 0 2 0
## P1.4 1 0 0 0 1 0 2 0
## P1.5 1 0 0 0 1 0 2 0
## P1.6 0 0 0 0 2 0 2 0
## loc.6.33 loc.7.06 loc.7.37 loc.7.40 loc.8.10 loc.8.21 loc.8.33 loc.8.44
## P1.1 0 1 1 0 2 0 0 0
## P1.2 0 0 2 0 1 0 0 0
## P1.3 0 0 2 0 0 0 2 0
## P1.4 0 0 2 0 0 1 0 1
## P1.5 0 0 2 0 0 0 1 1
## P1.6 0 0 2 0 0 0 1 1
## loc.8.48 loc.9.01 loc.9.13 loc.9.24 loc.9.25 loc.9.35 loc.9.40 loc.9.41
## P1.1 0 2 0 0 0 0 0 0
## P1.2 1 2 0 0 0 0 0 0
## P1.3 0 2 0 0 0 0 0 0
## P1.4 0 2 0 0 0 0 0 0
## P1.5 0 2 0 0 0 0 0 0
## P1.6 0 2 0 0 0 0 0 0
## loc.10.03 loc.10.22 loc.10.30 loc.10.37 loc.10.49 loc.11.05 loc.11.11
## P1.1 0 2 0 0 0 0 0
## P1.2 1 1 0 0 0 0 0
## P1.3 0 2 0 0 0 0 0
## P1.4 1 1 0 0 0 0 0
## P1.5 1 1 0 0 0 0 0
## P1.6 1 1 0 0 0 0 0
## loc.11.16 loc.11.21 loc.11.32 loc.11.37 loc.11.39 loc.11.41 loc.12.01
## P1.1 2 0 0 0 0 0 1
## P1.2 0 0 0 0 0 2 0
## P1.3 2 0 0 0 0 0 0
## P1.4 2 0 0 0 0 0 0
## P1.5 2 0 0 0 0 0 0
## P1.6 2 0 0 0 0 0 0
## loc.12.09 loc.12.20 loc.12.21 loc.13.34 loc.13.44 loc.13.47 loc.14.30
## P1.1 0 0 1 2 0 0 2
## P1.2 0 0 2 2 0 0 2
## P1.3 0 2 0 2 0 0 2
## P1.4 0 0 2 2 0 0 2
## P1.5 0 1 1 2 0 0 2
## P1.6 0 1 1 2 0 0 2
## loc.14.48 loc.14.50 loc.15.21 loc.15.30 loc.16.10 loc.16.12 loc.16.14
## P1.1 0 0 1 1 1 0 0
## P1.2 0 0 1 1 1 0 0
## P1.3 0 0 0 2 0 0 0
## P1.4 0 0 1 1 1 0 0
## P1.5 0 0 0 2 1 0 0
## P1.6 0 0 0 2 1 0 0
## loc.16.25 loc.16.34 loc.16.39 loc.17.06 loc.17.23 loc.17.38 loc.17.46
## P1.1 1 0 0 0 0 2 0
## P1.2 1 0 0 1 0 1 0
## P1.3 2 0 0 0 0 2 0
## P1.4 1 0 0 0 0 2 0
## P1.5 0 1 0 0 0 2 0
## P1.6 1 0 0 0 0 2 0
## loc.18.02 loc.18.25 loc.18.49 loc.18.50 loc.19.01 loc.19.13 loc.19.37
## P1.1 2 0 0 0 0 0 0
## P1.2 2 0 0 0 0 2 0
## P1.3 2 0 0 0 0 0 1
## P1.4 2 0 0 0 0 0 0
## P1.5 2 0 0 0 0 1 0
## P1.6 2 0 0 0 0 0 0
## loc.19.43 loc.19.44 loc.19.49 loc.20.11 loc.20.12 loc.20.17 loc.20.20
## P1.1 2 0 0 0 1 0 1
## P1.2 0 0 0 0 0 0 2
## P1.3 1 0 0 0 0 0 2
## P1.4 0 2 0 0 0 0 2
## P1.5 1 0 0 0 0 0 2
## P1.6 0 2 0 0 1 0 1
## loc.20.28 loc.20.34 loc.21.04 loc.21.15 loc.21.17 loc.21.31 loc.21.39
## P1.1 0 0 0 1 0 0 0
## P1.2 0 0 0 0 1 0 0
## P1.3 0 0 0 0 1 1 0
## P1.4 0 0 0 0 0 1 0
## P1.5 0 0 0 0 2 0 0
## P1.6 0 0 0 1 1 0 0
## loc.21.42 loc.21.43 loc.21.50 loc.22.20 loc.22.24 loc.22.49 loc.23.11
## P1.1 1 0 0 0 1 1 1
## P1.2 1 0 0 0 2 0 0
## P1.3 0 0 0 0 2 0 2
## P1.4 1 0 0 1 1 0 0
## P1.5 0 0 0 1 1 0 1
## P1.6 0 0 0 0 1 1 0
## loc.23.25 loc.23.38 loc.23.43 loc.23.46 loc.24.02 loc.24.12 loc.24.43
## P1.1 0 0 0 1 0 0 2
## P1.2 1 0 1 0 1 0 1
## P1.3 0 0 0 0 0 1 1
## P1.4 1 0 1 0 0 1 1
## P1.5 0 0 1 0 0 0 2
## P1.6 2 0 0 0 0 0 2
## loc.25.18 loc.25.23 loc.25.28 loc.25.30 loc.25.34 loc.26.01 loc.26.02
## P1.1 0 0 0 2 0 0 2
## P1.2 0 0 1 1 0 0 2
## P1.3 0 0 0 2 0 0 2
## P1.4 0 0 1 1 0 0 2
## P1.5 0 0 0 2 0 0 1
## P1.6 0 0 0 2 0 0 2
## loc.26.23 loc.26.30 loc.27.03 loc.27.06 loc.27.22 loc.27.24 loc.27.34
## P1.1 0 0 0 0 2 0 0
## P1.2 0 0 1 0 1 0 0
## P1.3 0 0 0 0 2 0 0
## P1.4 0 0 0 0 2 0 0
## P1.5 0 1 0 0 2 0 0
## P1.6 0 0 1 0 1 0 0
## loc.28.07 loc.28.13 loc.28.14 loc.28.20 loc.28.27 loc.28.42 loc.28.47
## P1.1 0 2 0 0 0 0 0
## P1.2 0 1 0 0 0 0 1
## P1.3 0 2 0 0 0 0 0
## P1.4 0 1 0 0 0 0 1
## P1.5 0 1 0 0 1 0 0
## P1.6 0 1 0 0 1 0 0
## loc.29.14 loc.29.17 loc.29.30 loc.29.44 loc.30.26 loc.30.34 loc.30.42
## P1.1 0 0 1 1 0 1 0
## P1.2 0 0 0 2 0 1 0
## P1.3 0 0 1 1 0 2 0
## P1.4 0 0 1 1 0 0 0
## P1.5 1 0 0 1 0 1 0
## P1.6 1 0 0 1 0 2 0
## loc.30.49 loc.30.50
## P1.1 1 0
## P1.2 1 0
## P1.3 0 0
## P1.4 2 0
## P1.5 1 0
## P1.6 0 0
head(a.env)
## Pop Pop.names Ind.names
## P1.1 1 P1 P1.1
## P1.2 1 P1 P1.2
## P1.3 1 P1 P1.3
## P1.4 1 P1 P1.4
## P1.5 1 P1 P1.5
## P1.6 1 P1 P1.6
For data b
b.comm <- data.frame(as.matrix(b$tab))
str(b.comm)
## 'data.frame': 600 obs. of 163 variables:
## $ loc.1.16 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.1.19 : int 1 2 2 2 2 1 1 2 1 2 ...
## $ loc.1.33 : int 1 0 0 0 0 0 1 0 0 0 ...
## $ loc.1.40 : int 0 0 0 0 0 1 0 0 1 0 ...
## $ loc.2.02 : int 2 2 2 1 2 1 2 1 1 1 ...
## $ loc.2.15 : int 0 0 0 1 0 1 0 1 1 1 ...
## $ loc.3.06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.07 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.08 : int 2 1 2 2 1 1 1 1 2 1 ...
## $ loc.3.15 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ loc.3.25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.44 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.47 : int 0 0 0 0 1 1 1 1 0 1 ...
## $ loc.4.02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.04 : int 0 2 0 0 2 0 1 0 2 2 ...
## $ loc.4.06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.18 : int 2 0 1 0 0 1 1 0 0 0 ...
## $ loc.4.33 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.38 : int 0 0 1 2 0 1 0 2 0 0 ...
## $ loc.5.06 : int 0 0 0 0 0 0 0 1 0 1 ...
## $ loc.5.30 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.31 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.48 : int 2 2 2 2 2 2 2 1 2 1 ...
## $ loc.6.01 : int 0 0 1 0 0 0 0 0 0 1 ...
## $ loc.6.09 : int 2 2 1 2 2 2 2 2 1 1 ...
## $ loc.6.39 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ loc.6.41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.09 : int 2 2 1 2 1 2 2 1 2 2 ...
## $ loc.7.16 : int 0 0 1 0 1 0 0 1 0 0 ...
## $ loc.7.20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.02 : int 1 1 0 2 0 0 1 1 1 0 ...
## $ loc.8.15 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.19 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.40 : int 1 1 2 0 2 2 1 1 1 2 ...
## $ loc.9.01 : int 1 0 2 1 1 0 1 0 0 0 ...
## $ loc.9.09 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.13 : int 0 2 0 0 0 1 0 0 1 0 ...
## $ loc.9.23 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.24 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.31 : int 1 0 0 0 0 0 1 1 0 1 ...
## $ loc.9.40 : int 0 0 0 1 1 1 0 1 1 1 ...
## $ loc.10.03: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.14: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.41: int 1 2 1 2 2 2 2 1 2 1 ...
## $ loc.10.46: int 0 0 0 0 0 0 0 1 0 0 ...
## $ loc.10.49: int 1 0 1 0 0 0 0 0 0 1 ...
## $ loc.10.50: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.02: int 0 1 1 0 0 1 0 1 2 1 ...
## $ loc.11.12: int 1 0 0 2 2 1 2 0 0 0 ...
## $ loc.11.13: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.21: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.23: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.25: int 0 1 1 0 0 0 0 1 0 0 ...
## $ loc.11.29: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.35: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.46: int 1 0 0 0 0 0 0 0 0 1 ...
## $ loc.11.50: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.03: int 1 0 1 0 1 2 2 1 0 1 ...
## $ loc.12.18: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.19: int 0 0 1 1 0 0 0 0 1 0 ...
## $ loc.12.26: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.35: int 1 0 0 0 0 0 0 1 0 0 ...
## $ loc.12.47: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.48: int 0 2 0 1 1 0 0 0 1 1 ...
## $ loc.12.50: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.10: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.13: int 0 1 0 0 1 1 0 0 0 0 ...
## $ loc.13.26: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.27: int 2 1 2 2 1 1 2 2 2 2 ...
## $ loc.13.38: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.06: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.09: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.14: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.30: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.15.15: int 0 0 1 1 0 0 0 0 0 1 ...
## $ loc.15.37: int 2 2 0 1 2 2 2 2 2 1 ...
## $ loc.15.41: int 0 0 1 0 0 0 0 0 0 0 ...
## $ loc.15.46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.02: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.04: int 2 2 2 2 2 1 1 2 2 2 ...
## $ loc.16.07: int 0 0 0 0 0 1 1 0 0 0 ...
## $ loc.16.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.37: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.16: int 1 0 0 0 0 0 0 2 0 2 ...
## $ loc.17.19: int 1 1 2 2 1 2 2 0 2 0 ...
## $ loc.17.38: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.50: int 0 1 0 0 1 0 0 0 0 0 ...
## $ loc.18.03: int 1 0 2 1 2 1 0 2 2 1 ...
## $ loc.18.09: int 1 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.32: int 0 0 0 0 0 0 0 0 0 1 ...
## $ loc.18.49: int 0 2 0 1 0 1 2 0 0 0 ...
## $ loc.19.01: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.19.09: int 0 1 1 0 0 1 0 0 0 0 ...
## [list output truncated]
b.env <- data.frame(Pop=clust.b$grp)
# order both data.sets sequentially by population - this is only important for the naming of individuals
pop.seq <- order(b.env$Pop)
b.env[pop.seq, "Pop"]
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2
## [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [223] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [260] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [297] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4
## [334] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [371] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5
## [408] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [445] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [482] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [519] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [556] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [593] 6 6 6 6 6 6 6 6
## Levels: 1 2 3 4 5 6
b.comm <- b.comm[pop.seq, ]
b.env <- b.env[pop.seq, , drop=FALSE]
table(b.env$Pop)
##
## 1 2 3 4 5 6
## 108 100 120 72 99 101
names.new <- paste0("P", b.env$Pop)
names.new2 <- names.new
names.new2[1] <- paste0(names.new[1], ".", 1)
counter <- 1
for (i in 2:length(names.new)) {
if (names.new[i] == names.new[i-1]) {
counter <- counter + 1
}else{
counter <- 1
}
names.new2[i] <- paste0(names.new[i], ".", counter)
}
b.env$Pop.names <- factor(names.new, levels=unique(names.new))
b.env$Ind.names <- names.new2
rownames(b.comm) <- names.new2
rownames(b.env) <- names.new2
BiodiversityR::check.datasets(b.comm, b.env)
## OK
For data c
c.comm <- data.frame(as.matrix(c$tab))
str(c.comm)
## 'data.frame': 600 obs. of 160 variables:
## $ loc.1.03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.1.34 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.1.45 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.2.01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.07 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.2.10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.33 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.50 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.07 : int 1 1 1 1 1 2 2 1 0 1 ...
## $ loc.3.20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.37 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ loc.3.41 : int 1 1 1 1 1 0 0 1 2 0 ...
## $ loc.3.46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.46 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.4.48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.11 : int 0 1 0 0 0 1 0 1 1 0 ...
## $ loc.5.12 : int 1 0 0 0 0 1 0 1 0 1 ...
## $ loc.5.14 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.42 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.47 : int 1 1 2 2 2 0 2 0 1 1 ...
## $ loc.6.16 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.29 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.43 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.45 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.7.05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.08 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.7.13 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.24 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.30 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.47 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.03 : int 0 0 0 0 1 0 1 0 0 1 ...
## $ loc.8.14 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.26 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.33 : int 1 2 2 1 1 1 1 1 2 1 ...
## $ loc.8.35 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.48 : int 1 0 0 1 0 1 0 1 0 0 ...
## $ loc.9.05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.14 : int 1 1 0 0 0 0 0 0 0 1 ...
## $ loc.9.19 : int 1 1 2 2 2 2 2 2 2 1 ...
## $ loc.9.23 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.04: int 0 0 1 0 0 1 1 0 0 1 ...
## $ loc.10.15: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.20: int 2 2 1 2 2 1 1 2 2 1 ...
## $ loc.10.22: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.27: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.34: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.37: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.19: int 1 1 2 2 1 0 1 1 2 1 ...
## $ loc.11.30: int 1 1 0 0 1 2 1 1 0 1 ...
## $ loc.11.46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.01: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.04: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.18: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.25: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.38: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.12.41: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.10: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.22: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.24: int 0 2 2 0 1 2 2 1 1 1 ...
## $ loc.13.28: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.34: int 2 0 0 2 1 0 0 1 1 1 ...
## $ loc.14.16: int 2 1 2 1 2 2 1 1 1 2 ...
## $ loc.14.18: int 0 1 0 1 0 0 1 1 1 0 ...
## $ loc.14.25: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.27: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.36: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.15: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.22: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.26: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.33: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.16.02: int 0 2 2 1 1 2 0 0 0 2 ...
## $ loc.16.31: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.32: int 0 0 0 0 0 0 0 1 0 0 ...
## $ loc.16.41: int 2 0 0 1 1 0 2 1 2 0 ...
## $ loc.16.45: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.07: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.17.08: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.20: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.17.32: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.08: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.15: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.19: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.18.28: int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
c.env <- data.frame(Pop=clust.c$grp)
# order both data.sets sequentially by population - this is only important for the naming of individuals
pop.seq <- order(c.env$Pop)
c.env[pop.seq, "Pop"]
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [26] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2
## [51] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [76] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [101] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4
## [126] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [151] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
## [176] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [201] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
## [226] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [251] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [276] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## [301] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7
## [326] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [351] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [376] 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [401] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [426] 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [451] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## [476] 9 9 9 9 9 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [501] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [526] 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
## [551] 11 11 11 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
## [576] 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12
c.comm <- c.comm[pop.seq, ]
c.env <- c.env[pop.seq, , drop=FALSE]
table(c.env$Pop)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 47 31 45 48 50 102 55 49 53 51 22 47
names.new <- paste0("P", c.env$Pop)
names.new2 <- names.new
names.new2[1] <- paste0(names.new[1], ".", 1)
counter <- 1
for (i in 2:length(names.new)) {
if (names.new[i] == names.new[i-1]) {
counter <- counter + 1
}else{
counter <- 1
}
names.new2[i] <- paste0(names.new[i], ".", counter)
}
c.env$Pop.names <- factor(names.new, levels=unique(names.new))
c.env$Ind.names <- names.new2
rownames(c.comm) <- names.new2
rownames(c.env) <- names.new2
BiodiversityR::check.datasets(c.comm, c.env)
## OK
For data d
d.comm <- data.frame(as.matrix(d$tab))
str(d.comm)
## 'data.frame': 600 obs. of 185 variables:
## $ loc.1.11 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.1.15 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.1.36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.31 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.2.44 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.2.50 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.3.19 : int 2 2 0 0 2 1 2 1 0 2 ...
## $ loc.3.27 : int 0 0 0 0 0 1 0 0 1 0 ...
## $ loc.3.33 : int 0 0 2 2 0 0 0 1 1 0 ...
## $ loc.3.45 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.08 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.4.16 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.17 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.21 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.34 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.37 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.39 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.4.40 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.12 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.14 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.5.35 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.39 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.5.42 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.13 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.21 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.6.41 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.7.02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.09 : int 1 2 1 0 0 1 0 0 0 0 ...
## $ loc.7.27 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.31 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.7.35 : int 1 0 1 2 2 1 2 2 2 2 ...
## $ loc.7.37 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.32 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.34 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.8.40 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.8.44 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.05 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.9.06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.15 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.17 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.29 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.9.42 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.23: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.35: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.10.41: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.44: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.10.49: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.01: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.15: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.34: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.35: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.11.45: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.11.46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.03: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.04: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.12.08: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.11: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.17: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.30: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.35: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.12.45: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.03: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.10: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.20: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.29: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.13.35: int 1 1 2 1 1 0 1 1 2 1 ...
## $ loc.13.46: int 1 1 0 1 1 2 1 1 0 1 ...
## $ loc.14.02: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.03: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.13: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.14.14: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.21: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.36: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.14.49: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.16: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.34: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.15.40: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.45: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.15.46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.03: int 2 2 2 2 2 2 2 2 2 2 ...
## $ loc.16.04: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.06: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.19: int 0 0 0 0 0 0 0 0 0 0 ...
## $ loc.16.37: int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
d.env <- data.frame(Pop=clust.d$grp)
# order both data.sets sequentially by population - this is only important for the naming of individuals
pop.seq <- order(d.env$Pop)
d.env[pop.seq, "Pop"]
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [26] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2
## [51] 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [76] 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5
## [101] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6
## [126] 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7
## [151] 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8
## [176] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9
## [201] 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10
## [226] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 11
## [251] 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 12 12 12 12 12
## [276] 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13
## [301] 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13
## [326] 13 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 15 15
## [351] 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 16 16 16
## [376] 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
## [401] 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 17 17
## [426] 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18
## [451] 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 19 19 19 19 19 19
## [476] 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 20 20 20 20 20 20
## [501] 20 20 20 20 20 20 20 20 20 20 20 21 21 21 21 21 21 21 21 21 21 21 21 21 21
## [526] 21 21 21 21 21 21 21 21 21 21 21 21 22 22 22 22 22 22 22 22 22 22 22 22 22
## [551] 22 22 22 22 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23 23 23 23 23 24 24
## [576] 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
d.comm <- d.comm[pop.seq, ]
d.env <- d.env[pop.seq, , drop=FALSE]
table(d.env$Pop)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 38 23 24 12 25 23 28 26 23 27 21 21 40 17 24 51 20 26 25 17 26 26 10 27
names.new <- paste0("P", d.env$Pop)
names.new2 <- names.new
names.new2[1] <- paste0(names.new[1], ".", 1)
counter <- 1
for (i in 2:length(names.new)) {
if (names.new[i] == names.new[i-1]) {
counter <- counter + 1
}else{
counter <- 1
}
names.new2[i] <- paste0(names.new[i], ".", counter)
}
d.env$Pop.names <- factor(names.new, levels=unique(names.new))
d.env$Ind.names <- names.new2
rownames(d.comm) <- names.new2
rownames(d.env) <- names.new2
BiodiversityR::check.datasets(d.comm, d.env)
## OK
Redundancy analysis is implemented by vegan::rda. This method combines methods of linear regression analysis with methods of principal components analysis (for a thorough treatment, see Legendre and Legendre 2012 starting from page 629) (note further that Principal Components Analysis - PCA - is also done in vegan via the rda function, i.e. by creating a null model through formulae with ‘~1’ as explanatory variables similar to regression models). With redundancy analysis it is not necessary to do a prior step of PCA to reduce the number of variables. In fact, I would recommend against such reduction as theoretically less variation would be explained by the model.
What could also be an advantage of using Redundancy Analysis instead of Discriminant Analysis of Principal Components is that several explanatory variables can be used, which allows expansion of the Redundancy Analysis framework to partial regression (see vegan::rda.formula) and variance partitioning techniques (see vegan::varpart).
As I showed in a previous publication, results from Redundancy Analysis can also be directly understood in terms of Sums-of-Squares that are used in Analysis of Molecular Variance.
After preparing the data sets, fitting the model is straightforward:
a.rda <- rda(a.comm ~ Pop.names, data=a.env)
a.rda
## Call: rda(formula = a.comm ~ Pop.names, data = a.env)
##
## Inertia Proportion Rank
## Total 32.1670 1.0000
## Constrained 9.5014 0.2954 5
## Unconstrained 22.6656 0.7046 109
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5
## 2.9680 2.5149 1.4537 1.4032 1.1616
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 1.0168 0.9382 0.9067 0.8690 0.7937 0.7731 0.7535 0.7084
## (Showing 8 of 109 unconstrained eigenvalues)
# summary(a.rda) # long output with all the 'sites' and 'species' scores
anova(a.rda, permutations=9999)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = a.comm ~ Pop.names, data = a.env)
## Df Variance F Pr(>F)
## Model 5 9.5014 49.801 1e-04 ***
## Residual 594 22.6656
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plotting can be done with ggplot2 as shown in the AMOVA document or in this document.
attach(a.env)
plot.a <- ordiplot(a.rda, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot.a, groups=Pop.names, display="sites", kind="sd")
Modify the ‘theme’ of ggplot2
BioR.theme <- theme(
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.line = element_line("gray25"),
text = element_text(size = 12),
axis.text = element_text(size = 10, colour = "gray25"),
axis.title = element_text(size = 14, colour = "gray25"),
legend.title = element_text(size = 14),
legend.text = element_text(size = 14),
legend.key = element_blank())
sites1 <- sites.long(plot.a, env.data=a.env)
axis1 <- axis.long(a.rda, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop.names, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop.names, centroids.only=FALSE)
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop.names")
plotgg.a <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_npg() +
coord_fixed(ratio=1)
plotgg.a
It is also possible to add information on alleles as vectors, using similar methods as adding vectors to ordination plots. The only vectors that are added or those that explain 40% of variation or more.
allele.envfit <- envfit(plot.a, env=a.comm, permutations=99)
allele.data.envfit <- data.frame(r=allele.envfit$vectors$r, p=allele.envfit$vectors$pvals)
allele.long2 <- species.long(plot.a, spec.data=allele.data.envfit)
allele.long3 <- allele.long2[allele.long2$r >= 0.4, ]
allele.long3
## r p axis1 axis2 labels
## loc.4.22 0.6065750 0.01 0.06991562 -1.1414840 loc.4.22
## loc.4.49 0.6056847 0.01 -0.05884754 1.1343234 loc.4.49
## loc.11.16 0.6354804 0.01 0.79376353 0.8399779 loc.11.16
## loc.16.10 0.4047873 0.01 0.36961633 0.6112023 loc.16.10
## loc.19.37 0.4820009 0.01 0.62369116 -0.6794746 loc.19.37
## loc.19.43 0.4120352 0.01 -0.77607706 0.4866709 loc.19.43
## loc.30.34 0.4965732 0.01 0.93920579 -0.2832212 loc.30.34
## loc.30.49 0.4969482 0.01 -0.98287559 0.2054791 loc.30.49
plotgg.a <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_segment(data=allele.long3,
aes(x=0, y=0, xend=axis1*1.2, yend=axis2*1.2),
colour="red", size=0.9, arrow=arrow()) +
geom_text_repel(data=allele.long3,
aes(x=axis1*1.3, y=axis2*1.3, label=labels),
colour="red") +
labs(colour = "Population") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_npg() +
coord_fixed(ratio=1)
plotgg.a
Rather than plotting alleles as vectors, a smoothed surface can be added
attach(a.comm)
allele.long3[which.max(allele.long3$p), "labels"]
## [1] "loc.4.22"
allele.surface <- ordisurf(plot.a, y=loc.4.22)
allele.grid <- ordisurfgrid.long(allele.surface)
plotgg.a <- ggplot() +
geom_contour_filled(data=allele.grid,
aes(x=x, y=y, z=z)) +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=FALSE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(fill = "loc.4.22") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_npg() +
coord_fixed(ratio=1)
plotgg.a
## Warning: Removed 150 rows containing non-finite values (stat_contour_filled).
Very similar scripts can be used as before.
b.rda <- rda(b.comm ~ Pop.names, data=b.env)
b.rda
## Call: rda(formula = b.comm ~ Pop.names, data = b.env)
##
## Inertia Proportion Rank
## Total 34.3125 1.0000
## Constrained 5.3641 0.1563 5
## Unconstrained 28.9484 0.8437 133
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5
## 1.8781 1.3828 0.8607 0.7436 0.4989
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 1.0847 0.9884 0.9435 0.8992 0.8856 0.8474 0.8253 0.7804
## (Showing 8 of 133 unconstrained eigenvalues)
# summary(b.rda) # long output with all the 'sites' and 'species' scores
anova(b.rda, permutations=9999)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = b.comm ~ Pop.names, data = b.env)
## Df Variance F Pr(>F)
## Model 5 5.3641 22.014 1e-04 ***
## Residual 594 28.9484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plotting the results
attach(b.env)
## The following objects are masked from a.env:
##
## Ind.names, Pop, Pop.names
plot.b <- ordiplot(b.rda, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot.b, groups=Pop.names, display="sites", kind="sd")
sites1 <- sites.long(plot.b, env.data=b.env)
axis1 <- axis.long(b.rda, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop.names, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop.names, centroids.only=FALSE)
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop.names")
plotgg.b <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_npg() +
coord_fixed(ratio=1)
plotgg.b
For obvious reasons, the procedure is similar.
c.rda <- rda(c.comm ~ Pop.names, data=c.env)
c.rda
## Call: rda(formula = c.comm ~ Pop.names, data = c.env)
##
## Inertia Proportion Rank
## Total 59.326 1.000
## Constrained 41.822 0.705 11
## Unconstrained 17.504 0.295 130
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7 RDA8 RDA9 RDA10 RDA11
## 27.354 5.119 4.163 1.535 1.244 0.711 0.479 0.392 0.366 0.343 0.116
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 0.7112 0.6125 0.5835 0.5291 0.4937 0.4886 0.4547 0.4376
## (Showing 8 of 130 unconstrained eigenvalues)
# summary(c.rda) # long output with all the 'sites' and 'species' scores
anova(c.rda, permutations=9999)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = c.comm ~ Pop.names, data = c.env)
## Df Variance F Pr(>F)
## Model 11 41.822 127.72 1e-04 ***
## Residual 588 17.504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plotting the results
attach(c.env)
## The following objects are masked from b.env:
##
## Ind.names, Pop, Pop.names
## The following objects are masked from a.env:
##
## Ind.names, Pop, Pop.names
plot.c <- ordiplot(c.rda, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot.c, groups=Pop.names, display="sites", kind="sd")
sites1 <- sites.long(plot.c, env.data=c.env)
axis1 <- axis.long(c.rda, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop.names, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop.names, centroids.only=FALSE)
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop.names")
plotgg.c <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(unique(Pop.names)), c=90, l=50)) +
coord_fixed(ratio=1)
plotgg.c
For obvious reasons, the procedure is similar.
d.rda <- rda(d.comm ~ Pop.names, data=d.env)
d.rda
## Call: rda(formula = d.comm ~ Pop.names, data = d.env)
##
## Inertia Proportion Rank
## Total 64.0772 1.0000
## Constrained 47.9669 0.7486 23
## Unconstrained 16.1103 0.2514 154
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7 RDA8 RDA9 RDA10 RDA11
## 23.225 10.411 3.851 2.146 1.545 1.241 0.808 0.729 0.685 0.595 0.468
## RDA12 RDA13 RDA14 RDA15 RDA16 RDA17 RDA18 RDA19 RDA20 RDA21 RDA22
## 0.411 0.371 0.326 0.229 0.212 0.196 0.136 0.100 0.097 0.077 0.064
## RDA23
## 0.045
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 0.5631 0.5132 0.4819 0.4751 0.4525 0.4350 0.4229 0.3744
## (Showing 8 of 154 unconstrained eigenvalues)
# summary(d.rda) # long output with all the 'sites' and 'species' scores
anova(d.rda, permutations=9999)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = d.comm ~ Pop.names, data = d.env)
## Df Variance F Pr(>F)
## Model 23 47.967 74.565 1e-04 ***
## Residual 576 16.110
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plotting the results
attach(d.env)
## The following objects are masked from c.env:
##
## Ind.names, Pop, Pop.names
## The following objects are masked from b.env:
##
## Ind.names, Pop, Pop.names
## The following objects are masked from a.env:
##
## Ind.names, Pop, Pop.names
plot.d <- ordiplot(d.rda, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot.d, groups=Pop.names, display="sites", kind="sd")
sites1 <- sites.long(plot.d, env.data=d.env)
axis1 <- axis.long(d.rda, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop.names, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop.names, centroids.only=FALSE)
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop.names")
plotgg.d <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(unique(Pop.names)), c=90, l=50)) +
coord_fixed(ratio=1)
plotgg.d
In a typical workflow, when researchers would obtain the ordination configurations related to the hierarchical island model with 6 populations (3, 2, 1), they would expect that there indeed are two hierarchical levels. It is possible to investigate that hierarchical level further in an analysis pathway that is similar to initial steps in the Analysis of Molecular Variance.
The earlier ordination process showed that populations P1, P2 and P5 cluster together in the ordination graph. Similarly, populations P3 and P4 also clustered together.
A higher level is created that captures this clustering structure.
b.env$Metapop <- character(length=nrow(b.env))
b.env[b.env$Pop.names %in% c("P1", "P2", "P5"), "Metapop"] <- "M1"
b.env[b.env$Pop.names %in% c("P3", "P4"), "Metapop"] <- "M2"
b.env[b.env$Pop.names == "P6", "Metapop"] <- "M3"
b.env$Metapop <- factor(b.env$Metapop, levels=sort(unique(b.env$Metapop)))
Now and similar to the AMOVA procedure, both metapopulation and population levels are used as explanatory variables for the Redundancy Analysis.
b.rda <- rda(b.comm ~ Metapop + Pop.names, data=b.env)
b.rda
## Call: rda(formula = b.comm ~ Metapop + Pop.names, data = b.env)
##
## Inertia Proportion Rank
## Total 34.3125 1.0000
## Constrained 5.3641 0.1563 5
## Unconstrained 28.9484 0.8437 133
## Inertia is variance
## Some constraints were aliased because they were collinear (redundant)
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5
## 1.8781 1.3828 0.8607 0.7436 0.4989
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 1.0847 0.9884 0.9435 0.8992 0.8856 0.8474 0.8253 0.7804
## (Showing 8 of 133 unconstrained eigenvalues)
# summary(d.rda) # long output with all the 'sites' and 'species' scores
anova(b.rda, by="terms", permutations=9999)
## Permutation test for rda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = b.comm ~ Metapop + Pop.names, data = b.env)
## Df Variance F Pr(>F)
## Metapop 2 3.1248 32.059 1e-04 ***
## Pop.names 3 2.2394 15.317 1e-04 ***
## Residual 594 28.9484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The significance test shows that both the metapopulation and population level are significant. As explained in the earlier document on AMOVA, this is suspicious with 6 populations in total. A significance test that uses appropriate permutation methods for nested designs is available via: BiodiversityR::nested.npmanova.
nested.npmanova(b.comm ~ Metapop + Pop, data = b.env, method="euclidean", permutations = 999)
## Total sum of squares of distance matrix: 20553.19
## Total sum of squares for non-parametric manova: 20553.1866666667
##
## Nested anova for Pop nested within Metapop
##
## Df SumsofSquares F N.Perm Pr(>F)
## Metapop 2 1871.7 2.093 999 0.014 *
## Pop 3 1341.4 15.317 999 0.001 ***
## Residuals 594 17340.1 29.192
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The new results can also be plotted:
attach(b.env)
## The following objects are masked from d.env:
##
## Ind.names, Pop, Pop.names
## The following objects are masked from c.env:
##
## Ind.names, Pop, Pop.names
## The following objects are masked from b.env (pos = 5):
##
## Ind.names, Pop, Pop.names
## The following objects are masked from a.env:
##
## Ind.names, Pop, Pop.names
plot.e <- ordiplot(b.rda, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot.e, groups=Pop.names, display="sites", kind="sd")
sites1 <- sites.long(plot.e, env.data=b.env)
axis1 <- axis.long(b.rda, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop.names, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop.names, centroids.only=FALSE)
centroids.Metapop1 <- centroids.long(sites1, grouping=Metapop, centroids.only=TRUE)
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop.names")
plotgg.e <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop.names,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop.names),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Metapop1,
aes(x=axis1c, y=axis2c, shape=Metapop),
colour="red", alpha=0.7, size=6) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop.names, colour=Pop.names),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population", shape="Metapopulation") +
BioR.theme +
theme(legend.text = element_text(size = 10)) +
guides(shape = guide_legend(override.aes = list(fill = "white"))) +
scale_colour_npg() +
coord_fixed(ratio=1)
plotgg.e
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] tcltk stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggrepel_0.8.2 dplyr_1.0.2 ggforce_0.3.2
## [4] ggsci_2.9 ggplot2_3.3.2 poppr_2.8.6
## [7] adegenet_2.1.3 ade4_1.7-16 BiodiversityR_2.12-3
## [10] vegan_2.5-6 lattice_0.20-41 permute_0.9-5
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.7 Hmisc_4.4-0
## [4] fastmatch_1.1-0 plyr_1.8.6 igraph_1.2.6
## [7] sp_1.4-2 splines_4.0.2 digest_0.6.25
## [10] htmltools_0.5.0 gdata_2.18.0 relimp_1.0-5
## [13] magrittr_1.5 checkmate_2.0.0 cluster_2.1.0
## [16] openxlsx_4.1.5 tcltk2_1.2-11 gmodels_2.18.1
## [19] sandwich_2.5-1 prettyunits_1.1.1 jpeg_0.1-8.1
## [22] colorspace_1.4-1 mitools_2.4 haven_2.3.1
## [25] xfun_0.15 crayon_1.3.4 lme4_1.1-23
## [28] survival_3.1-12 zoo_1.8-8 phangorn_2.5.5
## [31] ape_5.4 glue_1.4.1 polyclip_1.10-0
## [34] gtable_0.3.0 seqinr_4.2-4 polysat_1.7-4
## [37] car_3.0-8 abind_1.4-5 scales_1.1.1
## [40] DBI_1.1.0 Rcpp_1.0.4.6 isoband_0.2.1
## [43] viridisLite_0.3.0 xtable_1.8-4 progress_1.2.2
## [46] spData_0.3.8 htmlTable_2.0.0 units_0.6-7
## [49] foreign_0.8-80 spdep_1.1-5 Formula_1.2-3
## [52] survey_4.0 htmlwidgets_1.5.1 RColorBrewer_1.1-2
## [55] acepack_1.4.1 ellipsis_0.3.1 pkgconfig_2.0.3
## [58] farver_2.0.3 nnet_7.3-14 deldir_0.2-3
## [61] labeling_0.3 tidyselect_1.1.0 rlang_0.4.8
## [64] reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0
## [67] cellranger_1.1.0 tools_4.0.2 generics_0.1.0
## [70] evaluate_0.14 stringr_1.4.0 fastmap_1.0.1
## [73] yaml_2.2.1 knitr_1.28 zip_2.0.4
## [76] purrr_0.3.4 nlme_3.1-148 mime_0.9
## [79] compiler_4.0.2 rstudioapi_0.11 curl_4.3
## [82] png_0.1-7 e1071_1.7-3 tibble_3.0.1
## [85] statmod_1.4.34 tweenr_1.0.1 stringi_1.4.6
## [88] forcats_0.5.0 Matrix_1.2-18 classInt_0.4-3
## [91] nloptr_1.2.2.1 vctrs_0.3.4 effects_4.1-4
## [94] RcmdrMisc_2.7-0 pillar_1.4.4 LearnBayes_2.15.1
## [97] lifecycle_0.2.0 data.table_1.12.8 raster_3.4-5
## [100] httpuv_1.5.4 R6_2.4.1 latticeExtra_0.6-29
## [103] promises_1.1.1 KernSmooth_2.23-17 gridExtra_2.3
## [106] rio_0.5.16 codetools_0.2-16 boot_1.3-25
## [109] MASS_7.3-51.6 gtools_3.8.2 withr_2.2.0
## [112] nortest_1.0-4 Rcmdr_2.6-2 pegas_0.14
## [115] mgcv_1.8-31 expm_0.999-5 parallel_4.0.2
## [118] hms_0.5.3 quadprog_1.5-8 grid_4.0.2
## [121] rpart_4.1-15 coda_0.19-3 class_7.3-17
## [124] minqa_1.2.4 rmarkdown_2.3 carData_3.0-4
## [127] sf_0.9-6 shiny_1.4.0.2 base64enc_0.1-3