library(BiodiversityR) # also loads vegan
library(poppr)
library(ggplot2)
library(ggsci)
library(ggforce)
library(dplyr)
library(ggrepel)
Over a decade ago, while working on a manual for statistical analysis of dominant markers (this manual was partially inspired by a previous manual on the statistical analysis of biodiversity and community ecology data), I had explored the similarities between (i) Analysis of Molecular Variance (AMOVA) that was originally developed by Excoffier et al. 1992 ( See here for a recent expansion of the framework to autopolyploids ) and (ii) Ordination methods and Multivariate Analysis of Variance Using Distance Matrices in the field of community ecology (I provide references for these in subsequent sections). Whereas the manual is now more than a decade old (although many methods are still relevant; those interested could compare with this recent Review of Best Practices Population Genetic Analysis), in my opinion it remains valid to still highlight the connections between AMOVA and several methods of community ecology that are directly available via the vegan and BiodiversityR packages.
A second objective was to show a pathway of how data from poppr can be easily transformed into the community and environmental data sets used by vegan and BiodiverrsityR.
The third objective was to show a method of identifying outliers (possible recent migrants) by a simple graphical approach that I had started thinking about when working on the statistical manual, but never ended up publishing.
And a fourth objective was to show some examples of generating ggplot2 graphs, similar to other guidelines for generating ggplot2 graphs with vegan and BiodiversityR.
The poppr.amova example with the root rot pathogen Aphanomyces euteiches AFLP data is rerun here (more details are available from this primer ; see the original study here).
data(Aeut)
strata(Aeut) <- other(Aeut)$population_hierarchy[-1]
agc <- as.genclone(Aeut)
agc
##
## This is a genclone object
## -------------------------
## Genotype information:
##
## 119 original multilocus genotypes
## 187 diploid individuals
## 56 dominant loci
##
## Population information:
##
## 2 strata - Pop, Subpop
## 2 populations defined - Athena, Mt. Vernon
str(agc)
## Formal class 'genclone' [package "poppr"] with 12 slots
## ..@ mlg :Formal class 'MLG' [package "poppr"] with 7 slots
## .. .. ..@ visible : chr "original"
## .. .. ..@ cutoff : Named num [1:2] 0 0
## .. .. .. ..- attr(*, "names")= chr [1:2] "expanded" "contracted"
## .. .. ..@ distname: chr "diss.dist"
## .. .. ..@ distenv :<environment: R_GlobalEnv>
## .. .. ..@ distargs: list()
## .. .. ..@ distalgo: chr "farthest_neighbor"
## .. .. ..@ mlg :'data.frame': 187 obs. of 4 variables:
## .. .. .. ..$ expanded : int [1:187] 63 53 17 16 26 42 8 17 16 93 ...
## .. .. .. ..$ original : int [1:187] 63 53 17 16 26 42 8 17 16 93 ...
## .. .. .. ..$ contracted: int [1:187] 63 53 17 16 26 42 8 17 16 93 ...
## .. .. .. ..$ custom : Factor w/ 119 levels "1","2","3","4",..: 63 53 17 16 26 42 8 17 16 93 ...
## ..@ tab : int [1:187, 1:56] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:187] "001" "002" "003" "004" ...
## .. .. ..$ : chr [1:56] "L01" "L02" "L03" "L04" ...
## ..@ loc.fac : NULL
## ..@ loc.n.all: NULL
## ..@ all.names: NULL
## ..@ ploidy : int [1:187] 2 2 2 2 2 2 2 2 2 2 ...
## ..@ type : chr "PA"
## ..@ other :List of 1
## .. ..$ population_hierarchy:'data.frame': 187 obs. of 3 variables:
## .. .. ..$ Pop_Subpop: chr [1:187] "Athena_1" "Athena_1" "Athena_1" "Athena_1" ...
## .. .. ..$ Pop : chr [1:187] "Athena" "Athena" "Athena" "Athena" ...
## .. .. ..$ Subpop : chr [1:187] "1" "1" "1" "1" ...
## ..@ call : language as.genclone(x = Aeut)
## ..@ pop : Factor w/ 2 levels "Athena","Mt. Vernon": 1 1 1 1 1 1 1 1 1 1 ...
## ..@ strata :'data.frame': 187 obs. of 2 variables:
## .. ..$ Pop : Factor w/ 2 levels "Athena","Mt. Vernon": 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ Subpop: Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 2 ...
## ..@ hierarchy: NULL
head(agc$tab)
## L01 L02 L03 L04 L05 L06 L07 L08 L09 L10 L11 L12 L13 L14 L15 L16 L17 L18 L19
## 001 1 0 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 0 0
## 002 1 0 1 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 0
## 003 1 0 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0
## 004 1 0 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0
## 005 1 0 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0
## 006 1 0 1 0 0 0 0 0 1 1 1 1 1 0 0 1 0 0 0
## L20 L21 L22 L23 L24 L25 L26 L27 L28 L29 L30 L31 L32 L33 L34 L35 L36 L37 L38
## 001 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0
## 002 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0
## 003 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0
## 004 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0
## 005 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1 1 0
## 006 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0
## L39 L40 L41 L42 L43 L44 L45 L46 L47 L48 L49 L50 L51 L52 L53 L54 L55 L56
## 001 0 0 0 1 1 1 1 0 0 0 1 1 1 1 0 0 0 1
## 002 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 0 0 1
## 003 0 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0
## 004 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0
## 005 0 0 0 1 1 1 1 0 0 0 1 1 1 1 0 0 1 0
## 006 0 0 0 1 1 1 1 0 0 0 1 1 1 1 0 0 1 1
str(agc$tab)
## int [1:187, 1:56] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:187] "001" "002" "003" "004" ...
## ..$ : chr [1:56] "L01" "L02" "L03" "L04" ...
str(agc$other$population_hierarchy)
## 'data.frame': 187 obs. of 3 variables:
## $ Pop_Subpop: chr "Athena_1" "Athena_1" "Athena_1" "Athena_1" ...
## $ Pop : chr "Athena" "Athena" "Athena" "Athena" ...
## $ Subpop : chr "1" "1" "1" "1" ...
summary(agc$other$population_hierarchy)
## Pop_Subpop Pop Subpop
## Length:187 Length:187 Length:187
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
summary(factor(agc$other$population_hierarch$Pop_Subpop))
## Athena_1 Athena_10 Athena_2 Athena_3 Athena_4 Athena_5
## 9 9 12 10 13 10
## Athena_6 Athena_7 Athena_8 Athena_9 Mt. Vernon_1 Mt. Vernon_2
## 5 11 8 10 10 6
## Mt. Vernon_3 Mt. Vernon_4 Mt. Vernon_5 Mt. Vernon_6 Mt. Vernon_7 Mt. Vernon_8
## 8 12 17 12 12 13
amova.result <- poppr.amova(agc, ~Pop/Subpop, method="ade4")
##
## No missing values detected.
amova.result
## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
##
## $results
## Df Sum Sq Mean Sq
## Between Pop 1 1051.2345 1051.234516
## Between samples Within Pop 16 273.4575 17.091091
## Within samples 169 576.5059 3.411277
## Total 186 1901.1979 10.221494
##
## $componentsofcovariance
## Sigma %
## Variations Between Pop 11.063446 70.006786
## Variations Between samples Within Pop 1.328667 8.407483
## Variations Within samples 3.411277 21.585732
## Total variations 15.803391 100.000000
##
## $statphi
## Phi
## Phi-samples-total 0.7841427
## Phi-samples-Pop 0.2803128
## Phi-Pop-total 0.7000679
amova.test <- randtest(amova.result, nrepet=9999) # Test for significance
plot(amova.test)
amova.test
## class: krandtest lightkrandtest
## Monte-Carlo tests
## Call: randtest.amova(xtest = amova.result, nrepet = 9999)
##
## Number of tests: 3
##
## Adjustment method for multiple comparisons: none
## Permutation number: 9999
## Test Obs Std.Obs Alter Pvalue
## 1 Variations within samples 3.411277 -31.71730 less 1e-04
## 2 Variations between samples 1.328667 20.31031 greater 1e-04
## 3 Variations between Pop 11.063446 10.02392 greater 1e-04
These results are kept in data.frame for later comparisons
amova.data <- amova.result$results
Packages vegan and BiodiversityR typically use two data sets as input in community ecology statistical analysis : (i) a community data set with sites as rows, species as columns and abundance values as cell values; and (ii) an environmental data set with sites as rows and descriptors as columns. A more complete introduction is available from the Tree Diversity Analysis manual on community ecology and biodiversity analysis. It is expected that rows in both data sets reflect the same sites, something that can be checked via BiodiversityR::check.datasets.
agc.comm <- data.frame(as.matrix(agc$tab))
str(agc.comm)
## 'data.frame': 187 obs. of 56 variables:
## $ L01: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L02: int 0 0 0 0 0 0 0 0 0 1 ...
## $ L03: int 1 1 1 1 1 1 1 1 1 0 ...
## $ L04: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L05: int 1 0 0 0 0 0 0 0 0 0 ...
## $ L06: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L07: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L08: int 1 1 0 0 0 0 0 0 0 0 ...
## $ L09: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L10: int 0 1 0 0 0 1 0 0 0 0 ...
## $ L11: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L12: int 1 1 1 1 1 1 1 1 1 0 ...
## $ L13: int 1 1 1 1 1 1 0 1 1 1 ...
## $ L14: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L15: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L16: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L17: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L18: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L19: int 0 0 0 0 0 0 0 0 0 1 ...
## $ L20: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L21: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L22: int 0 0 0 0 0 0 0 0 0 1 ...
## $ L23: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L24: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L25: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L26: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L27: int 1 1 0 0 1 1 0 0 0 1 ...
## $ L28: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L29: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L30: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L31: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L32: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L33: int 0 0 0 0 0 0 0 0 0 1 ...
## $ L34: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L35: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L36: int 1 1 0 0 1 1 0 0 0 1 ...
## $ L37: int 0 0 0 0 1 0 0 0 0 0 ...
## $ L38: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L39: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L40: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L41: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L42: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L43: int 1 1 0 0 1 1 0 0 0 0 ...
## $ L44: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L45: int 1 1 0 0 1 1 0 0 0 0 ...
## $ L46: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L47: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L48: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L49: int 1 1 1 1 1 1 1 1 1 1 ...
## $ L50: int 1 1 0 0 1 1 0 0 0 1 ...
## $ L51: int 1 1 1 0 1 1 0 1 0 1 ...
## $ L52: int 1 1 1 1 1 1 0 1 1 1 ...
## $ L53: int 0 1 0 0 0 0 0 0 0 1 ...
## $ L54: int 0 0 0 0 0 0 0 0 0 0 ...
## $ L55: int 0 0 0 0 1 1 0 0 0 1 ...
## $ L56: int 1 1 0 0 0 1 0 0 0 0 ...
agc.env <- data.frame(as.matrix(agc$other$population_hierarchy))
agc.env[, 1] <- as.factor(agc.env[, 1])
agc.env[, 2] <- as.factor(agc.env[, 2])
agc.env[, 3] <- as.factor(agc.env[, 3])
str(agc.env)
## 'data.frame': 187 obs. of 3 variables:
## $ Pop_Subpop: Factor w/ 18 levels "Athena_1","Athena_10",..: 1 1 1 1 1 1 1 1 1 3 ...
## $ Pop : Factor w/ 2 levels "Athena","Mt. Vernon": 1 1 1 1 1 1 1 1 1 1 ...
## $ Subpop : Factor w/ 10 levels "1","10","2","3",..: 1 1 1 1 1 1 1 1 1 3 ...
BiodiversityR::check.datasets(agc.comm, agc.env)
## Warning: rownames for community and environmental datasets are different
Checking via BiodiversityR::check.datasets gives a warning that the row names are different. As both community and environmental came from the same genclone object, the problem with differences in rownames can be ignored for the analysis. But the rownames can be changed to a name that reflects the population structure.
names.new <- agc.env$Pop_Subpop
names.new <- gsub(pattern="Athena_", replacement="A", x=names.new)
names.new <- gsub(pattern="Mt. Vernon_", replacement="M", x=names.new)
table(names.new)
## names.new
## A1 A10 A2 A3 A4 A5 A6 A7 A8 A9 M1 M2 M3 M4 M5 M6 M7 M8
## 9 9 12 10 13 10 5 11 8 10 10 6 8 12 17 12 12 13
names.new2 <- names.new
names.new2[1] <- paste0(names.new[1], ".", 1)
counter <- 1
for (i in 2:length(names.new)) {
if (names.new[i] == names.new[i-1]) {
counter <- counter + 1
}else{
counter <- 1
}
names.new2[i] <- paste0(names.new[i], ".", counter)
}
cbind(names.new, names.new2)[1:25,]
## names.new names.new2
## [1,] "A1" "A1.1"
## [2,] "A1" "A1.2"
## [3,] "A1" "A1.3"
## [4,] "A1" "A1.4"
## [5,] "A1" "A1.5"
## [6,] "A1" "A1.6"
## [7,] "A1" "A1.7"
## [8,] "A1" "A1.8"
## [9,] "A1" "A1.9"
## [10,] "A2" "A2.1"
## [11,] "A2" "A2.2"
## [12,] "A2" "A2.3"
## [13,] "A2" "A2.4"
## [14,] "A2" "A2.5"
## [15,] "A2" "A2.6"
## [16,] "A2" "A2.7"
## [17,] "A2" "A2.8"
## [18,] "A2" "A2.9"
## [19,] "A2" "A2.10"
## [20,] "A2" "A2.11"
## [21,] "A2" "A2.12"
## [22,] "A3" "A3.1"
## [23,] "A3" "A3.2"
## [24,] "A3" "A3.3"
## [25,] "A3" "A3.4"
rownames(agc.comm) <- names.new2
rownames(agc.env) <- names.new2
check.datasets(agc.comm, agc.env)
## OK
As input data for the analysis, a distance matrix is generated with Euclidean distance among individuals.
agc.dist <- vegdist(agc.comm, method="euclidean")
as.matrix(agc.dist)[1:5, 1:5]
## A1.1 A1.2 A1.3 A1.4 A1.5
## A1.1 0.000000 1.732051 2.828427 3.000000 2.236068
## A1.2 1.732051 0.000000 3.000000 3.162278 2.449490
## A1.3 2.828427 3.000000 0.000000 1.000000 2.645751
## A1.4 3.000000 3.162278 1.000000 0.000000 2.828427
## A1.5 2.236068 2.449490 2.645751 2.828427 0.000000
The molecular analysis of variance can now be conducted as a Permutational Multivariate Analysis of Variance Using Distance Matrices. For more details, especially a reference to the original article by Marti J. Anderson on non‐parametric multivariate analysis of variance, check the documentation of the vegan::adonis function.
agc.adonis <- adonis2(agc.dist ~ Pop + Pop_Subpop, data=agc.env, permutations=9999)
agc.adonis
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = agc.dist ~ Pop + Pop_Subpop, data = agc.env, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## Pop 1 1051.23 0.55293 308.1645 1e-04 ***
## Pop_Subpop 16 273.46 0.14383 5.0102 1e-04 ***
## Residual 169 576.51 0.30323
## Total 186 1901.20 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
These results show the same SumOfSqs as with the AMOVA.
adonis.data <- data.frame(agc.adonis)
adonis.data[which(rownames(adonis.data) == "Pop"), "SumOfSqs"] -
amova.data[which(rownames(amova.data)=="Between Pop "), "Sum Sq"]
## [1] 2.273737e-13
adonis.data[which(rownames(adonis.data) == "Pop_Subpop"), "SumOfSqs"] -
amova.data[which(rownames(amova.data)=="Between samples Within Pop"), "Sum Sq"]
## [1] 9.094947e-13
It is possible to now also calculate statistics such as PHIst from the adonis result. However, as these already are available from the AMOVA result, what is more relevant are the permutation tests shown in the next section. Directly below I show an approximation method with average sizes of populations and subpopulations. The actual calculation require the adjustment of sample sizes for unbalanced datasets (See for example equations 4a - 4c here - and these are still available also from the decade-old manual on the analysis of dominant markers; See page 80).
adonis.data$MS <- adonis.data$SumOfSqs / adonis.data$Df
amova.covar.data <- data.frame(amova.result$componentsofcovariance)
amova.phi.data <- data.frame(amova.result$statphi)
subpop.sizes <- table(agc.env$Pop_Subpop)
subpop.sizes
##
## Athena_1 Athena_10 Athena_2 Athena_3 Athena_4 Athena_5
## 9 9 12 10 13 10
## Athena_6 Athena_7 Athena_8 Athena_9 Mt. Vernon_1 Mt. Vernon_2
## 5 11 8 10 10 6
## Mt. Vernon_3 Mt. Vernon_4 Mt. Vernon_5 Mt. Vernon_6 Mt. Vernon_7 Mt. Vernon_8
## 8 12 17 12 12 13
n1 <- mean(subpop.sizes)
n1
## [1] 10.38889
pop.sizes <- table(agc.env$Pop)
pop.sizes
##
## Athena Mt. Vernon
## 97 90
n2 <- mean(pop.sizes)
n2
## [1] 93.5
Sigma.withinSamples <- adonis.data[3, "MS"]
Sigma.withinSamples
## [1] 3.411277
amova.covar.data[rownames(amova.covar.data) == "Variations Within samples", "Sigma"]
## [1] 3.411277
Sigma.betweenSamples <- (adonis.data[2, "MS"] -
Sigma.withinSamples) / n1
Sigma.betweenSamples
## [1] 1.316773
amova.covar.data[rownames(amova.covar.data) == "Variations Between samples Within Pop", "Sigma"]
## [1] 1.328667
Sigma.betweenPop <- (adonis.data[1, "MS"] -
Sigma.withinSamples - n1*Sigma.betweenSamples) / n2
Sigma.betweenPop
## [1] 11.06036
amova.covar.data[rownames(amova.covar.data) == "Variations Between Pop ", "Sigma"]
## [1] 11.06345
Sigma.total <- Sigma.withinSamples + Sigma.betweenSamples + Sigma.betweenPop
PHI.ST <- (Sigma.betweenPop + Sigma.betweenSamples) / Sigma.total
PHI.ST
## [1] 0.7839378
amova.phi.data[rownames(amova.phi.data) == "Phi-samples-total", ]
## [1] 0.7841427
PHI.SP <- Sigma.betweenSamples / (Sigma.betweenSamples + Sigma.withinSamples)
PHI.SP
## [1] 0.2785024
amova.phi.data[rownames(amova.phi.data) == "Phi-samples-Pop", ]
## [1] 0.2803128
PHI.PT <- Sigma.betweenPop / Sigma.total
PHI.PT
## [1] 0.7005366
amova.phi.data[rownames(amova.phi.data) == "Phi-Pop-total", ]
## [1] 0.7000679
In the results above, permutations of individuals were not restricted. Proper significance testing requires that permutations are informed by this nested structure. For example, to test the significance of populations, entire subpopulations should be randomly shuffled across populations. Check the original article of A new method for non‐parametric multivariate analysis of variance for details about appropriate permutation schemes for nested designs.
The permuation methodology appropriate for nested designs is implemented in BiodiversityR::nested.npmanova. The number of permutations does not need to be set very high to investigate whether there are sufficient levels (here: the number of subpopulations) to test the significance of the higher level (here: population structure).
agc.nested <- nested.npmanova(agc.dist ~ Pop + Pop_Subpop, data = agc.env, permutations = 999)
## Total sum of squares of distance matrix: 1901.198
agc.nested
## Total sum of squares for non-parametric manova: 1901.19786096257
##
## Nested anova for Pop_Subpop nested within Pop
##
## Df SumsofSquares F N.Perm Pr(>F)
## Pop 1 1051.23 61.5077 999 0.001 ***
## Pop_Subpop 16 273.46 5.0102 999 0.001 ***
## Residuals 169 576.51 3.4113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The results above show that there is statistical evidence for differentiation at the population level (based on shuffling of entire subpopulations over populations) beyond the subpopulation level.
In the following data, however, where there are only 5 subpopulations, there is no evidence for differentiation at that level. This data set contains information from the medicinal tree species Warburgia ugandensis with ‘populations’ (in the terminology above, these are at the subpopulation level) located on different sides of branches of the East African Rift System.
data(warcom)
str(warcom)
## 'data.frame': 100 obs. of 185 variables:
## $ locus001: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus002: int 0 1 0 0 0 0 1 0 0 0 ...
## $ locus003: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus004: int 0 1 1 1 1 1 1 1 0 1 ...
## $ locus005: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus006: int 0 1 0 0 0 0 0 0 0 0 ...
## $ locus007: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus008: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus009: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus010: int 0 1 0 1 0 0 1 0 0 1 ...
## $ locus011: int 0 0 1 0 0 0 0 0 0 0 ...
## $ locus012: int 1 0 1 1 1 1 1 1 0 0 ...
## $ locus013: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus014: int 0 1 1 0 0 0 1 0 0 0 ...
## $ locus015: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus016: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus017: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus018: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus019: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus020: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus021: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus022: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus023: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus024: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus025: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus026: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus027: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus028: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus029: int 0 1 1 0 0 0 1 0 1 0 ...
## $ locus030: int 0 1 1 1 0 0 1 1 1 1 ...
## $ locus031: int 0 1 1 0 0 1 1 0 1 1 ...
## $ locus032: int 0 0 0 0 0 0 1 0 0 0 ...
## $ locus033: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus034: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus035: int 0 1 0 0 0 0 1 0 0 0 ...
## $ locus036: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus037: int 0 0 1 0 0 0 0 0 0 0 ...
## $ locus038: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus039: int 0 0 1 1 1 0 1 0 0 0 ...
## $ locus040: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus041: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus042: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus043: int 1 1 0 0 0 1 0 1 0 1 ...
## $ locus044: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus045: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus046: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus047: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus048: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus049: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus050: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus051: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus052: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus053: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus054: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus055: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus056: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus057: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus058: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus059: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus060: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus061: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus062: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus063: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus064: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus065: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus066: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus067: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus068: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus069: int 0 0 0 1 0 0 0 0 0 0 ...
## $ locus070: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus071: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus072: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus073: int 1 0 1 1 0 1 0 1 1 0 ...
## $ locus074: int 1 1 1 0 1 1 1 1 1 1 ...
## $ locus075: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus076: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus077: int 0 1 0 1 1 1 1 0 1 1 ...
## $ locus078: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus079: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus080: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus081: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus082: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus083: int 1 1 1 0 1 1 1 1 1 1 ...
## $ locus084: int 0 0 0 1 0 0 0 0 0 0 ...
## $ locus085: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus086: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus087: int 1 1 1 1 1 1 1 1 1 0 ...
## $ locus088: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus089: int 1 1 1 1 1 1 1 1 1 0 ...
## $ locus090: int 1 1 1 1 1 1 1 1 1 1 ...
## $ locus091: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus092: int 0 0 0 0 0 0 0 0 0 0 ...
## $ locus093: int 1 1 1 1 1 1 1 1 1 0 ...
## $ locus094: int 1 1 0 0 0 0 0 0 1 0 ...
## $ locus095: int 1 1 1 0 1 1 1 1 1 1 ...
## $ locus096: int 0 0 0 1 0 0 0 0 0 0 ...
## $ locus097: int 1 1 0 1 0 1 0 1 1 1 ...
## $ locus098: int 1 0 0 0 1 0 0 0 0 0 ...
## $ locus099: int 1 1 1 0 0 1 0 1 0 1 ...
## [list output truncated]
data(warenv)
summary(warenv)
## population popshort country rift.valley
## Kibale :20 KKIT:20 Kenya :60 east:60
## Kitale :20 KLAI:20 Tanzania:20 west:40
## Laikipia:20 KMAR:20 Uganda :20
## Lushoto :20 TLUS:20
## Mara :20 UKIB:20
The nested analysis now shows that five populations are not sufficient to obtain significant differences for investigating differences between the two sides of the Rift Valley. The permutation test clearly reflects that there are only 10 possibly combinations of randomly allocating 2 populations to the western branch of the Rift Valley.
set.seed(1)
nested.npmanova(warcom ~ rift.valley + population, data=warenv, method="euc", permutations=999)
## Total sum of squares of distance matrix: 1913.98
## Total sum of squares for non-parametric manova: 1913.98
##
## Nested anova for population nested within rift.valley
##
## Df SumsofSquares F N.Perm Pr(>F)
## rift.valley 1 442.40 6.9895 999 0.106
## population 3 189.88 4.6914 999 0.001 ***
## Residuals 95 1281.70 13.4916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note further that although an option exists of using strata in adonis, that this analysis estimates significant differentiation at the population level of the Rift Valley.
war.adonis <- adonis2(warcom ~ country + population, data=warenv, method="euclidean",
strata="population", permutations=9999)
war.adonis
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = warcom ~ country + population, data = warenv, permutations = 9999, method = "euclidean", strata = "population")
## Df SumOfSqs R2 F Pr(>F)
## country 2 278.08 0.14529 10.306 1e-04 ***
## population 2 354.20 0.18506 13.127 1e-04 ***
## Residual 95 1281.70 0.66965
## Total 99 1913.98 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As shown in the above, AMOVA and adonis provide the same information on sums-of-squares. Another alternative to get information on sums-of-squares is via redundancy analysis.
agc.rda <- rda(agc.comm ~ Pop + Pop_Subpop, data = agc.env)
agc.rda
## Call: rda(formula = agc.comm ~ Pop + Pop_Subpop, data = agc.env)
##
## Inertia Proportion Rank
## Total 10.2215 1.0000
## Constrained 7.1220 0.6968 17
## Unconstrained 3.0995 0.3032 54
## Inertia is variance
## Some constraints were aliased because they were collinear (redundant)
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7 RDA8 RDA9 RDA10 RDA11 RDA12 RDA13
## 5.901 0.521 0.189 0.140 0.085 0.054 0.043 0.038 0.030 0.028 0.024 0.020 0.018
## RDA14 RDA15 RDA16 RDA17
## 0.010 0.008 0.007 0.005
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 0.7314 0.4479 0.2299 0.1959 0.1628 0.1377 0.1068 0.1043
## (Showing 8 of 54 unconstrained eigenvalues)
# summary(agc.rda) # comprehensive list of data that can subsequently be plotted in an ordination graph
The inertia can be used to calculate the sums of squares by multiplying it by the (number of individuals - 1).
SumsofSqsTotal.RDA <- agc.rda$tot.chi * (nrow(agc.comm) - 1)
SumsofSqsTotal.RDA
## [1] 1901.198
amova.data[rownames(amova.data) == "Total", "Sum.Sq"] - SumsofSqsTotal.RDA
## numeric(0)
rda.anova.data <- permutest(agc.rda, by="terms", permutations=9999)
rda.anova.data
##
## Permutation test for rda under reduced model
##
## Permutation: free
## Number of permutations: 9999
##
## Model: rda(formula = agc.comm ~ Pop + Pop_Subpop, data = agc.env)
## Permutation test for sequential contrasts
## Df Inertia F Pr(>F)
## Pop 1 5.6518 308.1645 1e-04 ***
## Pop_Subpop 16 1.4702 5.0102 1e-04 ***
## Residual 169 3.0995
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Again the inertia can be used to calculate sums-of-squares.
SumsofSqsPop.RDA <- rda.anova.data$chi[1] * (nrow(agc.comm) - 1)
SumsofSqsPop.RDA
##
## 1051.235
amova.data[rownames(amova.data) == "Between Pop ", "Sum.Sq"] - SumsofSqsPop.RDA
## numeric(0)
SumsofSqsBetweenSamples.RDA <- rda.anova.data$chi[2] * (nrow(agc.comm) - 1)
SumsofSqsBetweenSamples.RDA
##
## 273.4575
amova.data[rownames(amova.data) == "Between samples Within Pop", "Sum.Sq"] - SumsofSqsBetweenSamples.RDA
## numeric(0)
SumsofSqsWithinSamples.RDA <- rda.anova.data$chi[3] * (nrow(agc.comm) - 1)
SumsofSqsWithinSamples.RDA
## Residual
## 576.5059
amova.data[rownames(amova.data) == "Within samples", "Sum.Sq"] - SumsofSqsWithinSamples.RDA
## numeric(0)
A possibility exists of testing significance levels that are appropriate for nested data via the BiodiversityR::nested.anova.dbrda function, analogous to the BiodiversityR::nested.npmanova function shown above. An example is provided in the BiodiversityR documentation with the Warburgia data sets.
Yet another method of obtaining information on Sums-of-Squares is from the distances between individuals and centroids in ordination graphs. To explain the methodology, first a typical ordination diagram is made whereby centroids are connected to individuals via ‘spider plots’. For more details on the plotting methods, please check this document.
The preparation of the graphs is by first making an ordiplot graph.
plot1 <- ordiplot(agc.rda, choices=c(1, 2))
Then data can be extracted …
attach(agc.env)
sites1 <- sites.long(plot1, env.data=agc.env)
axis1 <- axis.long(agc.rda, choices=c(1, 2))
centroids.Subpop1 <- centroids.long(sites1, grouping=Pop_Subpop, centroids.only=TRUE)
centroids.Subpop2 <- centroids.long(sites1, grouping=Pop_Subpop, centroids.only=FALSE)
centroids.Pop1 <- centroids.long(sites1, grouping=Pop, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop, centroids.only=FALSE)
… so that the plot can be made
BioR.theme <- theme(
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.line = element_line("gray25"),
text = element_text(size = 12),
axis.text = element_text(size = 10, colour = "gray25"),
axis.title = element_text(size = 14, colour = "gray25"),
legend.title = element_text(size = 14),
legend.text = element_text(size = 14),
legend.key = element_blank())
plotgg1 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_mark_hull(data=sites1,
aes(x=axis1, y=axis2, colour=Pop,
fill=after_scale(alpha(colour, 0.2)),
label=Pop),
concavity=3, size=0.2, expand=unit(1, "mm"), show.legend=FALSE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop),
size=0.4, show.legend=FALSE) +
geom_point(data=sites1,
aes(x=axis1, y=axis2, colour=Pop, shape=Pop),
alpha=0.7, size=2) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, shape=Pop),
size=4, colour="darkolivegreen4") +
labs(colour = "Centroid", shape = "Centroid") +
BioR.theme +
ggsci::scale_colour_npg() +
coord_fixed(ratio=1)
What we can see in the graph is a clear separation between the Athena and Mt. Vernon population. Each of the individuals is connected to the centroid of the Population through the ‘spiderplot’.
plotgg1
Although it is generally recommended to keep the coordinate system fixed (as ordination diagram reflect distances among individuals), the plot is expanded here to make differences within populations more obvious. Now also function geom_mark_hull is able to include labels.
plotgg1 + coord_fixed(0.25)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
In the next graph, now individuals are connected to the centroids of their subpopulations.
plotgg2 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_segment(data=centroids.Subpop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop_Subpop),
size=0.4, show.legend=TRUE) +
geom_point(data=centroids.Subpop2,
aes(x=axis1c, y=axis2c),
colour="black", alpha=0.7, size=2, show.legend=FALSE) +
guides(colour = guide_legend(ncol=2)) +
BioR.theme +
labs(colour = "Subpopulation") +
theme(legend.text = element_text(size = 8)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(unique(Pop_Subpop)), c=90, l=50)) +
coord_fixed(ratio=0.25)
In the plot, there is much overlap between the subpopulations. The individuals for the Mt. Vernon population that are plotted near individuals of the Athena population belong to two subpopulations of Mt. Vernon.
plotgg2
As there is a lot of overlap between the subpopulations, separate graphs are made for both of the Populations:
sites1A <- sites1[sites1$Pop == "Athena", ]
sites1M <- sites1[sites1$Pop != "Athena", ]
attach(sites1A)
centroids.Subpop1A <- centroids.long(sites1A, grouping=Pop_Subpop, centroids.only=TRUE)
centroids.Subpop2A <- centroids.long(sites1A, grouping=Pop_Subpop, centroids.only=FALSE)
attach(sites1M)
centroids.Subpop1M <- centroids.long(sites1M, grouping=Pop_Subpop, centroids.only=TRUE)
centroids.Subpop2M <- centroids.long(sites1M, grouping=Pop_Subpop, centroids.only=FALSE)
plotgg3 <- ggplot() +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_segment(data=centroids.Subpop2A,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop_Subpop),
size=0.6) +
geom_point(data=centroids.Subpop1A,
aes(x=axis1c, y=axis2c),
size=2, colour="black") +
BioR.theme +
labs(colour="Subpopulations") +
scale_colour_brewer(palette="Paired") +
coord_fixed(ratio=0.1)
plotgg3
plotgg4 <- ggplot() +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_segment(data=centroids.Subpop2M,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop_Subpop),
size=0.6) +
geom_point(data=centroids.Subpop1M,
aes(x=axis1c, y=axis2c),
size=2, colour="black") +
BioR.theme +
labs(colour="Subpopulations") +
scale_colour_brewer(palette="Dark2") +
coord_fixed(ratio=0.4)
plotgg4
Now only plotting a subset of the data
plotgg5 <- plotgg4 + coord_fixed(ratio=0.1, xlim=c(0.4, 0.65))
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plotgg5
Hopefully there were sufficient graphs above to illustrate connections between individuals and centroids! It now happens that distances between individuals and the centroids also reflects the Sums-of-Squares - for more details see for example Figure 2 in: A new method for non‐parametric multivariate analysis of variance.
It is easier to calculate the Sums-of_Squares via capscale and caprescale. With Euclidean distances, the same ordination configurations are obtained with capscale as with rda.
agc.cap <- capscale(agc.comm ~ Pop + Pop_Subpop, data = agc.env)
agc.cap
## Call: capscale(formula = agc.comm ~ Pop + Pop_Subpop, data = agc.env)
##
## Inertia Proportion Rank
## Total 10.2215 1.0000
## Constrained 7.1220 0.6968 17
## Unconstrained 3.0995 0.3032 54
## Inertia is mean squared Euclidean distance
## Species scores projected from 'agc.comm'
## Some constraints were aliased because they were collinear (redundant)
##
## Eigenvalues for constrained axes:
## CAP1 CAP2 CAP3 CAP4 CAP5 CAP6 CAP7 CAP8 CAP9 CAP10 CAP11 CAP12 CAP13
## 5.901 0.521 0.189 0.140 0.085 0.054 0.043 0.038 0.030 0.028 0.024 0.020 0.018
## CAP14 CAP15 CAP16 CAP17
## 0.010 0.008 0.007 0.005
##
## Eigenvalues for unconstrained axes:
## MDS1 MDS2 MDS3 MDS4 MDS5 MDS6 MDS7 MDS8
## 0.7314 0.4479 0.2299 0.1959 0.1628 0.1377 0.1068 0.1043
## (Showing 8 of 54 unconstrained eigenvalues)
The BiodiversityR::caprescale function has a ‘verbose’ options that gives information on Sums-of-Squares (the function itself rescales positions of sites so that these directly reflect the distances of the matrix used as input).
agc.cap2 <- caprescale(agc.cap, verbose=TRUE)
## SSTot obtained from sum of all eigenvalues: 1901.198
## SSTot obtained from sum of all positive eigenvalues: 1901.198
## SSTot reflected by distances among site scores on all axes: 2294.103
## SSExpl obtained from eigenvalues of constrained axes: 1324.692
## SSExpl reflected by distances among site scores on constrained axes (scaling 1): 1717.597
## SSExpl reflected by distances among fitted site scores on constrained axes (scaling 1): 1324.692
## SSRes obtained from eigenvalues of positive unconstrained axes: 576.5059
## SSRes reflected by distances among site scores on positive unconstrained axes (scaling 1): 576.5059
These results directly indicate 2 of the values of the AMOVA: (i) the Total Sum-of-Squares corresponds to the sum of all positive eigenvalues; and (ii) the Within samples Sum-of-Squares corresponds to the distances among sites on unconstrained axes.
amova.data
## Df Sum Sq Mean Sq
## Between Pop 1 1051.2345 1051.234516
## Between samples Within Pop 16 273.4575 17.091091
## Within samples 169 576.5059 3.411277
## Total 186 1901.1979 10.221494
It can further be verified that the sum of the distances among the fitted site scores and among the sites scores on unconstrained axes equals the total Sum-of-Squares:
1901.198 - 1324.692 - 576.5059
## [1] 1e-04
It is also clear that the fitted site scores reflect the Sum-of-Squares between populations and between samples within populations. What is now important to realize is the fitted site scores are related to the positions of the centroids in the ordination diagram, which will be shown below.
1324.692 - 1051.2345 - 273.4575
## [1] 0
The calculations done by caprescale will now be repeated. First to cross-check is the within samples Sum-of-Squares.
n.axes <- length(agc.cap2$CCA$eig)+length(agc.cap2$CA$eig)
n.constrained.axes <- length(agc.cap2$CCA$eig)
n.unconstrained.axes <- length(agc.cap2$CCA$eig)
Ind.scores <- scores(agc.cap2, choices=c((n.constrained.axes+1):n.axes), display="sites", scaling=1)
Distances.unconstrained <- data.frame(as.matrix(dist(Ind.scores, method="euclidean")))
SumsofSqsWithinSamples.CAP <- sum((Distances.unconstrained)^2)/(2 * nrow(agc.env))
SumsofSqsWithinSamples.CAP
## [1] 576.5059
amova.data[rownames(amova.data) == "Within samples", "Sum.Sq"] - SumsofSqsWithinSamples.CAP
## numeric(0)
The second set to cross-check are the fitted site scores.
Ind.scores <- scores(agc.cap2, choices=c(1:n.constrained.axes), display="lc", scaling=1)
Distances.constrained <- data.frame(as.matrix(dist(Ind.scores, method="euclidean")))
SumsofSqsAmongSamples.CAP <- sum((Distances.constrained)^2)/(2 * nrow(agc.env))
SumsofSqsAmongSamples.CAP
## [1] 1324.692
amova.among <- amova.data[rownames(amova.data) == "Between Pop ", "Sum.Sq"] +
amova.data[rownames(amova.data) == "Between samples Within Pop", "Sum.Sq"]
amova.among - SumsofSqsAmongSamples.CAP
## numeric(0)
What are these fitted sites scores? Let’s make a plot.
plot2 <- ordiplot(agc.cap2, choices=c(1, 2), display=c("sites", "lc"))
Again data can be extracted …
attach(agc.env)
sites1 <- sites.long(plot2, env.data=agc.env)
axis2 <- axis.long(agc.cap2, choices=c(1, 2))
centroids.Subpop1 <- centroids.long(sites1, grouping=Pop_Subpop, centroids.only=TRUE)
centroids.Pop1 <- centroids.long(sites1, grouping=Pop, centroids.only=TRUE)
# to extract the coordinates for the constraints with sites.long, we need to save these as 'sites'
plot2$sites.old <- plot2$sites
plot2$sites <- plot2$constraints
sites1 <- sites.long(plot2, env.data=agc.env)
centroids.Pop_constraints <- centroids.long(sites1, grouping=Pop, centroids.only=TRUE)
… so that the plot can be made
plotgg1 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis2[1, "label"]) +
ylab(axis2[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_mark_hull(data=sites1,
aes(x=axis1, y=axis2, colour=Pop,
fill=after_scale(alpha(colour, 0.2)),
label=Pop),
concavity=3, size=0.2, expand=unit(10, "mm"), show.legend=FALSE) +
geom_point(data=sites1,
aes(x=axis1, y=axis2, colour=Pop, shape=Pop),
alpha=1, size=3) +
geom_point(data=centroids.Subpop1,
aes(x=axis1c, y=axis2c),
shape=21, size=10) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, shape=Pop, colour=Pop),
alpha=0.9, size=8, show.legend=FALSE) +
geom_point(data=centroids.Pop_constraints,
aes(x=axis1c, y=axis2c, shape=Pop),
shape=21, colour="red", size=15, show.legend=FALSE) +
labs(colour = "Population", shape = "Population") +
BioR.theme +
ggsci::scale_colour_npg() +
coord_fixed(ratio=0.5)
plotgg1
This graph confirms that the fitted site scores (small symbols) correspond to the locations of the centroids (circles) of the Sub-populations. The large symbols with the larger red circles indicate the centroids for the populations. As it happens, the population centroids are also at the centre of the subpopulation centroids.
To calculated distances among the centroids, their scores on all constrained ordination axes need to be calculated (for the unconstrained axes, their scores are expected to be zero and are very close to zero in reality).
n.axes <- length(agc.cap2$CCA$eig)
Ind.scores <- scores(agc.cap2, choices=c(1:n.axes), display="sites", scaling=1)
axes.names <- names(Ind.scores)
Ind.names <- rownames(Ind.scores)
Ind.data <- data.frame(agc.env, Ind.scores)
Pop.centroids <- stats::aggregate(Ind.scores ~ Pop, data=Ind.data, FUN=mean)
Pop.names <- make.names(Pop.centroids[, 1])
rownames(Pop.centroids) <- Pop.names
Pop.centroids <- Pop.centroids[, -1]
Pop.centroids
## CAP1 CAP2 CAP3 CAP4 CAP5
## Athena -2.283306 -0.02061335 0.02125587 0.01771723 0.001922478
## Mt..Vernon 2.460897 0.02221662 -0.02290911 -0.01909523 -0.002072004
## CAP6 CAP7 CAP8 CAP9 CAP10
## Athena 0.02603368 -0.0003550685 0.01438731 -0.0004147741 -0.01529852
## Mt..Vernon -0.02805853 0.0003826850 -0.01550632 0.0004470343 0.01648841
## CAP11 CAP12 CAP13 CAP14 CAP15
## Athena -0.003240271 -0.005840210 -0.003244635 -0.005457971 -0.0003423148
## Mt..Vernon 0.003492292 0.006294448 0.003496996 0.005882480 0.0003689393
## CAP16 CAP17
## Athena -0.0005747724 -0.002965292
## Mt..Vernon 0.0006194769 0.003195926
Subpop.centroids <- stats::aggregate(Ind.scores ~ Pop_Subpop, data=Ind.data, FUN=mean)
Subpop.names <- make.names(Subpop.centroids[, 1])
rownames(Subpop.centroids) <- Subpop.names
Subpop.centroids <- Subpop.centroids[, -1]
Subpop.centroids
## CAP1 CAP2 CAP3 CAP4 CAP5
## Athena_1 -2.3610756 0.43316834 0.008401662 0.12116231 -0.34300755
## Athena_10 -2.3805971 -0.26526873 -0.666453689 0.01346035 -0.15354567
## Athena_2 -1.5197657 -1.73472454 0.981421615 0.41398245 0.10703032
## Athena_3 -2.3426396 1.25593724 0.481436951 -0.05733145 -0.52589126
## Athena_4 -2.3300962 1.00991127 0.250263829 -0.34129888 0.82604171
## Athena_5 -2.5150645 -0.50032773 -1.021810973 -0.02292575 0.03837031
## Athena_6 -2.4089852 -0.65951026 -0.452624197 -0.03444327 -0.14711825
## Athena_7 -2.3636843 -0.51754763 -0.271319659 0.08848206 0.13473941
## Athena_8 -2.3762712 1.11653196 0.501807787 -0.04324138 -0.26764493
## Athena_9 -2.4644534 -0.33205195 -0.040928596 0.03234799 -0.10976200
## Mt..Vernon_1 2.2906473 -0.60817710 0.269674188 -0.57315124 -0.04953234
## Mt..Vernon_2 0.8965574 -0.35660201 0.179110464 -0.60644251 -0.13064362
## Mt..Vernon_3 2.6871227 -0.10306236 -0.176564503 -0.34713376 0.14161141
## Mt..Vernon_4 2.8138478 0.17815313 -0.083484955 0.27927059 0.24050213
## Mt..Vernon_5 2.7621832 0.55904007 -0.078688468 0.80477927 0.01072851
## Mt..Vernon_6 1.3929761 0.08171758 -0.128723902 0.19830867 0.08599066
## Mt..Vernon_7 2.9416927 -0.17302869 0.135168135 -0.38753637 -0.17865798
## Mt..Vernon_8 2.9968138 0.03843016 -0.166039678 -0.33331506 -0.15358376
## CAP6 CAP7 CAP8 CAP9
## Athena_1 0.2451286149 -0.1516640429 -0.4359681418 0.389130950
## Athena_10 -0.0912559394 -0.0766225509 0.1486624648 -0.247903961
## Athena_2 0.1210674940 -0.1881971966 0.0897483153 -0.059356317
## Athena_3 -0.2965142329 -0.1167264952 0.4605223257 0.131278008
## Athena_4 0.0666888053 -0.1558820802 -0.0003731561 0.034060290
## Athena_5 -0.0071600269 -0.2863588453 -0.0214775720 -0.175527292
## Athena_6 -0.2227815340 -0.2142687540 -0.0190394278 0.239021519
## Athena_7 -0.0001015276 0.5717003301 0.0392989143 0.231749298
## Athena_8 0.1854480844 0.2867711607 -0.2791055380 -0.482002033
## Athena_9 0.1488831345 0.2824295357 0.0414496816 -0.048762473
## Mt..Vernon_1 -0.3513869518 0.0513777198 -0.0582469527 -0.063174975
## Mt..Vernon_2 -0.4996884081 -0.0716362151 -0.4173898730 -0.101486435
## Mt..Vernon_3 0.0291791390 0.1678073957 0.2768119638 -0.002883349
## Mt..Vernon_4 -0.1632641344 0.1443893172 0.0213811123 0.019372819
## Mt..Vernon_5 -0.0466014620 -0.0399367115 -0.0484452645 -0.065644964
## Mt..Vernon_6 -0.1692324713 -0.0867790941 -0.0668472516 0.099040143
## Mt..Vernon_7 0.1143705280 -0.0009636086 -0.1011541086 0.088450197
## Mt..Vernon_8 0.5510028628 -0.1071394387 0.1584428383 -0.004801944
## CAP10 CAP11 CAP12 CAP13 CAP14
## Athena_1 -0.235365720 -0.15366713 0.004411007 0.057223765 0.116735432
## Athena_10 -0.348584716 0.18467783 0.254193419 0.265267745 -0.018585003
## Athena_2 -0.005324045 -0.04378850 -0.029352702 0.045417241 0.005014648
## Athena_3 0.033702393 0.01025498 -0.125599433 0.002396002 0.012970783
## Athena_4 -0.038734563 0.08681497 0.022982312 -0.015167181 -0.008689027
## Athena_5 0.072668027 -0.17569095 -0.289451453 -0.086485785 0.044001655
## Athena_6 0.191981843 -0.06627190 0.268093603 -0.328953877 -0.364270478
## Athena_7 -0.013245240 0.11737996 -0.044466064 -0.042821694 0.020619984
## Athena_8 0.071339130 -0.21488228 0.072680115 -0.058422786 -0.091095800
## Athena_9 0.189040648 0.12170626 -0.012275118 -0.014089861 0.039357913
## Mt..Vernon_1 -0.298585277 -0.03554407 0.096945205 -0.288316240 0.098672680
## Mt..Vernon_2 0.308985482 0.32689526 -0.192946470 0.154683925 0.090686972
## Mt..Vernon_3 0.085481098 -0.40731111 0.094479438 0.106029051 0.146984074
## Mt..Vernon_4 -0.140153565 -0.13290968 -0.209650634 0.098957620 -0.143545901
## Mt..Vernon_5 -0.026157653 0.13097143 0.006944863 -0.110325505 0.047689414
## Mt..Vernon_6 0.313459696 -0.08714114 0.262540682 0.117918436 0.047381940
## Mt..Vernon_7 -0.045954000 0.02566635 -0.061276329 0.158407121 -0.169385703
## Mt..Vernon_8 0.065269392 0.15945874 -0.001425823 -0.092792961 0.015275220
## CAP15 CAP16 CAP17
## Athena_1 0.029308148 -0.0654238674 0.0103652387
## Athena_10 0.039447009 -0.0192504088 0.0012878821
## Athena_2 0.036172756 0.0190149289 -0.0041772430
## Athena_3 -0.002607214 0.0339421478 0.0094705275
## Athena_4 -0.022924076 -0.0053740353 -0.0202149816
## Athena_5 -0.057423567 0.0851973820 0.0003629668
## Athena_6 0.113234625 -0.1247811436 -0.0744212034
## Athena_7 0.082021860 0.1389572910 -0.0165788702
## Athena_8 0.066035819 0.0606186012 0.0099539243
## Athena_9 -0.218445335 -0.2032969715 0.0296917540
## Mt..Vernon_1 -0.076293568 0.0163510810 0.0548406752
## Mt..Vernon_2 0.179362930 -0.0804341688 -0.0438012114
## Mt..Vernon_3 0.075346695 -0.0970273186 -0.1475602864
## Mt..Vernon_4 0.053561043 -0.0974368191 0.1164740669
## Mt..Vernon_5 -0.016443468 -0.0003718248 -0.0806382455
## Mt..Vernon_6 -0.054637844 0.0980129885 0.1108600530
## Mt..Vernon_7 -0.159366868 0.0955995060 -0.0896061921
## Mt..Vernon_8 0.101696312 0.0002521994 0.0692793191
Next all the centroid positions are combined and a distance matrix is created.
origin <- Ind.scores[1, , drop=FALSE]
rownames(origin) <- "origin"
Scores.all <- rbind(Pop.centroids, Subpop.centroids, origin)
Scores.all[rownames(Scores.all) == "origin", ] <- 0
Distances.centroids <- data.frame(as.matrix(dist(Scores.all, method="euclidean")))
Dist.names <- rownames(Distances.centroids)
The distances of the populations to the origin of the graph are:
Distances.Pop <- Distances.centroids[rownames(Distances.centroids) %in% Pop.names, "origin"]
Distances.Pop
## [1] 2.283833 2.461465
This information now needs to be multiplied by population sizes:
# add information on population size
pop.sizes <- table(agc.env$Pop)
pop.sizes
##
## Athena Mt. Vernon
## 97 90
By multiplying population sizes with the squared distances of the population centroids to the origin (i.e., for each individual, add the squared distance of its population centroid to the origin to the Sum-of-Squares), we get the Sums-of-Squares between populations.
SumsofSqsAmongPopulations.CAP <- unname(pop.sizes[1]*(Distances.Pop[1])^2 + pop.sizes[2]*(Distances.Pop[2])^2)
SumsofSqsAmongPopulations.CAP
## [1] 1051.235
amova.data[rownames(amova.data) == "Between Pop ", "Sum Sq"] - SumsofSqsAmongPopulations.CAP
## [1] -1.591616e-12
In a similar pathway, the distances between population centroids and subpopulation centroids are calculated. First some information is added - and deleted - from the data.frame with the distances. This data.frame will also be used in the next section.
agc.env$Pop <- make.names(agc.env$Pop)
agc.env$Pop_Subpop <- make.names(agc.env$Pop_Subpop)
Distances.centroids$Name <- rownames(Distances.centroids)
Distances.centroids <- left_join(Distances.centroids,
unique(agc.env[, c("Pop_Subpop", "Pop")]),
by=c("Name" = "Pop_Subpop"))
Distances.centroids$Centroid.focal <- numeric(nrow(Distances.centroids))
for (i in 1:nrow(Distances.centroids)) {
if (is.na(Distances.centroids[i, "Pop"])== FALSE) {
Distances.centroids[i, "Centroid.focal"] <- Distances.centroids[i, which(names(Distances.centroids) == Distances.centroids[i, "Pop"])]
}
}
Scores.all <- rbind(Ind.scores, Pop.centroids, Subpop.centroids, origin)
Scores.all[rownames(Scores.all) == "origin", ] <- 0
Distances.all <- data.frame(as.matrix(dist(Scores.all, method="euclidean")))
Dist.names <- rownames(Distances.all)
Distances.all$Ind.name <- rownames(Distances.all)
Distances.all$Pop.name <- character(nrow(Distances.all))
Distances.all$Subpop.name <- character(nrow(Distances.all))
Distances.all$Subpop.focal <- numeric(nrow(Distances.all))
Distances.all[1:nrow(agc.env), "Pop.name"] <- agc.env$Pop
Distances.all[1:nrow(agc.env), "Subpop.name"] <- agc.env$Pop_Subpop
for (i in 1:nrow(agc.env)) {
Distances.all[i, "Subpop.focal"] <- Distances.all[i, which(names(Distances.all) == agc.env[i, "Pop_Subpop"])]
}
Distances.all <- left_join(Distances.all,
Distances.centroids[, c("Name", "Centroid.focal")],
by=c("Subpop.name" = "Name"))
Distances.all[is.na(Distances.all)] <- 0
SumsofSqsBetweenSamples.CAP <- sum((Distances.all$Centroid.focal)^2)
SumsofSqsBetweenSamples.CAP
## [1] 273.4575
amova.data[rownames(amova.data) == "Between samples Within Pop", "Sum Sq"] - SumsofSqsBetweenSamples.CAP
## [1] 1.705303e-13
From the caprescale output and the calculations above, the distances among site scores on the constrained ordination axes do NOT exactly reflect the Within samples Sum-of-Squares (instead, that Sum-of-Squares is expressed on the unconstrained axes) (and it was also shown in sections above that the centroids of these site scores reflect the between population and subpopulation Sums-of-SQuares). For a thorough discussion of these site scores see Legendre and Legendre 2012, for example on page 638).
However, these site scores do approximate the distances in the distance matrix. Points that are closer to each other in the ordination graph have in general a smaller pairwise distance - as it is the objective of ordination methods. This pattern can be shown via BiodiversityR::distdisplayed.
plot2 <- ordiplot(agc.cap2, display="sites")
distdisplayed(agc.dist, plot2, distx="euclidean")
## $gamanalysis
##
## Family: gaussian
## Link function: identity
##
## Formula:
## dist2 ~ s(dist1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.62375 0.01608 412 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dist1) 8.985 9 3974 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.673 Deviance explained = 67.3%
## GCV = 4.4985 Scale est. = 4.4959 n = 17391
##
## $mantelanalysis
##
## Mantel statistic based on Spearman's rank correlation rho
##
## Call:
## mantel(xdis = dist1, ydis = dist2, method = method, permutations = permutations)
##
## Mantel statistic r: 0.6752
## Significance: 0.009901
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0157 0.0240 0.0248 0.0257
## Permutation: free
## Number of permutations: 100
What also causes the range of y-values for a fixed x-value is the fact that the ordination graph only shows the distances among sites on the first two axes. This effect on the range of values can be shown by a principal coordinate analysis, which can be done with capscale by setting the explanatory variables to ‘1’.
agc.cap0 <- caprescale(capscale(agc.comm ~ 1, data = agc.env))
plot3 <- ordiplot(agc.cap0, display="sites")
distdisplayed(agc.dist, plot3, distx="euclidean")
## $gamanalysis
##
## Family: gaussian
## Link function: identity
##
## Formula:
## dist2 ~ s(dist1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.62211 0.01119 502.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dist1) 8.988 9 6305 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.765 Deviance explained = 76.6%
## GCV = 2.178 Scale est. = 2.1767 n = 17391
##
## $mantelanalysis
##
## Mantel statistic based on Spearman's rank correlation rho
##
## Call:
## mantel(xdis = dist1, ydis = dist2, method = method, permutations = permutations)
##
## Mantel statistic r: 0.788
## Significance: 0.009901
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0132 0.0169 0.0188 0.0244
## Permutation: free
## Number of permutations: 100
In section 7.4, a data.frame was created that also includes the distance of each individual to its subpopulation centroid. These calculations are based on all the constrained ordination axes.
Distances.ind <- Distances.all[which(Distances.all$Ind.name %in% Ind.names), ]
Subpop.names2 <- c(Subpop.names, "Ind.name", "Subpop.name", "Subpop.focal")
Distances.ind <- Distances.ind[, which(names(Distances.ind) %in% Subpop.names2)]
Distances.ind[c(1:30), c("Ind.name", "Subpop.name", "Subpop.focal")]
## Ind.name Subpop.name Subpop.focal
## 1 A1.1 Athena_1 1.6182809
## 2 A1.2 Athena_1 1.4283553
## 3 A1.3 Athena_1 0.9989061
## 4 A1.4 Athena_1 1.1433385
## 5 A1.5 Athena_1 1.5778902
## 6 A1.6 Athena_1 1.6423199
## 7 A1.7 Athena_1 1.4200972
## 8 A1.8 Athena_1 0.9989061
## 9 A1.9 Athena_1 1.1433385
## 10 A2.1 Athena_2 2.4042656
## 11 A2.2 Athena_2 2.6964285
## 12 A2.3 Athena_2 1.3233750
## 13 A2.4 Athena_2 1.1521599
## 14 A2.5 Athena_2 1.4702261
## 15 A2.6 Athena_2 1.5353748
## 16 A2.7 Athena_2 1.3964384
## 17 A2.8 Athena_2 1.0594455
## 18 A2.9 Athena_2 1.2903032
## 19 A2.10 Athena_2 1.3849269
## 20 A2.11 Athena_2 1.5629585
## 21 A2.12 Athena_2 1.0135055
## 22 A3.1 Athena_3 0.1360980
## 23 A3.2 Athena_3 0.1360980
## 24 A3.3 Athena_3 0.1360980
## 25 A3.4 Athena_3 0.1360980
## 26 A3.5 Athena_3 1.2248823
## 27 A3.6 Athena_3 0.1360980
## 28 A3.7 Athena_3 0.1360980
## 29 A3.8 Athena_3 0.1360980
## 30 A3.9 Athena_3 0.1360980
Now we check for the name of the subpopulation to which an individual has the smallest distance.
Distances.ind$closest.centroid.index <- apply(Distances.ind[, which(names(Distances.ind) %in% Subpop.names)], MARGIN=1, FUN=which.min)
Distances.ind$closest.centroid.name <- Subpop.names[Distances.ind$closest.centroid.index]
Distances.ind[c(1:30), c("Ind.name", "Subpop.name", "Subpop.focal", "closest.centroid.name")]
## Ind.name Subpop.name Subpop.focal closest.centroid.name
## 1 A1.1 Athena_1 1.6182809 Athena_6
## 2 A1.2 Athena_1 1.4283553 Athena_6
## 3 A1.3 Athena_1 0.9989061 Athena_1
## 4 A1.4 Athena_1 1.1433385 Athena_1
## 5 A1.5 Athena_1 1.5778902 Athena_9
## 6 A1.6 Athena_1 1.6423199 Athena_6
## 7 A1.7 Athena_1 1.4200972 Athena_1
## 8 A1.8 Athena_1 0.9989061 Athena_1
## 9 A1.9 Athena_1 1.1433385 Athena_1
## 10 A2.1 Athena_2 2.4042656 Athena_7
## 11 A2.2 Athena_2 2.6964285 Athena_8
## 12 A2.3 Athena_2 1.3233750 Athena_2
## 13 A2.4 Athena_2 1.1521599 Athena_2
## 14 A2.5 Athena_2 1.4702261 Athena_2
## 15 A2.6 Athena_2 1.5353748 Athena_2
## 16 A2.7 Athena_2 1.3964384 Athena_2
## 17 A2.8 Athena_2 1.0594455 Athena_2
## 18 A2.9 Athena_2 1.2903032 Athena_2
## 19 A2.10 Athena_2 1.3849269 Athena_2
## 20 A2.11 Athena_2 1.5629585 Athena_2
## 21 A2.12 Athena_2 1.0135055 Athena_2
## 22 A3.1 Athena_3 0.1360980 Athena_3
## 23 A3.2 Athena_3 0.1360980 Athena_3
## 24 A3.3 Athena_3 0.1360980 Athena_3
## 25 A3.4 Athena_3 0.1360980 Athena_3
## 26 A3.5 Athena_3 1.2248823 Athena_3
## 27 A3.6 Athena_3 0.1360980 Athena_3
## 28 A3.7 Athena_3 0.1360980 Athena_3
## 29 A3.8 Athena_3 0.1360980 Athena_3
## 30 A3.9 Athena_3 0.1360980 Athena_3
Now the individuals can be flagged that are closest to a subpopulation centroid that is not their own
Distances.ind$migrant.check <- Distances.ind$Subpop.name != Distances.ind$closest.centroid.name
Distances.ind[Distances.ind$migrant.check == TRUE, c("Ind.name", "Subpop.name", "Subpop.focal", "closest.centroid.name")]
## Ind.name Subpop.name Subpop.focal closest.centroid.name
## 1 A1.1 Athena_1 1.6182809 Athena_6
## 2 A1.2 Athena_1 1.4283553 Athena_6
## 5 A1.5 Athena_1 1.5778902 Athena_9
## 6 A1.6 Athena_1 1.6423199 Athena_6
## 10 A2.1 Athena_2 2.4042656 Athena_7
## 11 A2.2 Athena_2 2.6964285 Athena_8
## 32 A4.1 Athena_4 2.0396259 Athena_10
## 38 A4.7 Athena_4 2.2427190 Athena_6
## 42 A4.11 Athena_4 2.3913135 Athena_6
## 43 A4.12 Athena_4 1.8846889 Athena_10
## 49 A5.5 Athena_5 2.1734550 Athena_7
## 52 A5.8 Athena_5 1.2084516 Athena_6
## 54 A5.10 Athena_5 1.3988773 Athena_6
## 60 A7.1 Athena_7 1.3869500 Athena_9
## 63 A7.4 Athena_7 1.1258154 Athena_9
## 67 A7.8 Athena_7 1.6393342 Athena_10
## 68 A7.9 Athena_7 1.9291106 Athena_1
## 69 A7.10 Athena_7 1.9378740 Athena_1
## 70 A7.11 Athena_7 1.6393342 Athena_10
## 71 A8.1 Athena_8 1.7119174 Athena_9
## 78 A8.8 Athena_8 2.1277532 Athena_10
## 79 A9.1 Athena_9 2.1991091 Athena_8
## 80 A9.2 Athena_9 2.3632018 Athena_8
## 81 A9.3 Athena_9 0.9843777 Athena_7
## 82 A9.4 Athena_9 1.5326315 Athena_5
## 83 A9.5 Athena_9 1.9195120 Athena_2
## 85 A9.7 Athena_9 2.1875622 Athena_2
## 86 A9.8 Athena_9 1.3492054 Athena_6
## 90 A10.2 Athena_10 1.1533330 Athena_9
## 96 A10.8 Athena_10 1.7568162 Athena_5
## 98 M1.1 Mt..Vernon_1 1.3385447 Mt..Vernon_7
## 99 M1.2 Mt..Vernon_1 1.2364507 Mt..Vernon_3
## 100 M1.3 Mt..Vernon_1 1.1992236 Mt..Vernon_3
## 102 M1.5 Mt..Vernon_1 2.2549934 Mt..Vernon_2
## 104 M1.7 Mt..Vernon_1 1.4887327 Mt..Vernon_7
## 106 M1.9 Mt..Vernon_1 2.9176861 Mt..Vernon_2
## 107 M1.10 Mt..Vernon_1 1.4334119 Mt..Vernon_3
## 108 M2.1 Mt..Vernon_2 2.5384165 Mt..Vernon_3
## 109 M2.2 Mt..Vernon_2 3.8894555 Athena_5
## 112 M2.5 Mt..Vernon_2 2.2054485 Mt..Vernon_1
## 113 M2.6 Mt..Vernon_2 3.7066902 Athena_6
## 118 M3.5 Mt..Vernon_3 1.4410735 Mt..Vernon_7
## 122 M4.1 Mt..Vernon_4 0.9274832 Mt..Vernon_3
## 123 M4.2 Mt..Vernon_4 1.1828363 Mt..Vernon_7
## 124 M4.3 Mt..Vernon_4 1.1632311 Mt..Vernon_5
## 126 M4.5 Mt..Vernon_4 1.1632311 Mt..Vernon_5
## 127 M4.6 Mt..Vernon_4 1.1632311 Mt..Vernon_5
## 128 M4.7 Mt..Vernon_4 1.6074377 Mt..Vernon_7
## 129 M4.8 Mt..Vernon_4 1.0558442 Mt..Vernon_3
## 130 M4.9 Mt..Vernon_4 0.9604548 Mt..Vernon_7
## 131 M4.10 Mt..Vernon_4 1.1632311 Mt..Vernon_5
## 132 M4.11 Mt..Vernon_4 1.8893947 Mt..Vernon_7
## 133 M4.12 Mt..Vernon_4 1.1632311 Mt..Vernon_5
## 136 M5.3 Mt..Vernon_5 1.7867565 Mt..Vernon_7
## 143 M5.10 Mt..Vernon_5 1.6620569 Mt..Vernon_8
## 150 M5.17 Mt..Vernon_5 1.9435317 Mt..Vernon_7
## 151 M6.1 Mt..Vernon_6 4.1935953 Athena_7
## 152 M6.2 Mt..Vernon_6 1.6371858 Mt..Vernon_5
## 153 M6.3 Mt..Vernon_6 1.6966716 Mt..Vernon_5
## 154 M6.4 Mt..Vernon_6 1.6371858 Mt..Vernon_5
## 155 M6.5 Mt..Vernon_6 3.8629632 Athena_6
## 156 M6.6 Mt..Vernon_6 1.6664233 Mt..Vernon_5
## 157 M6.7 Mt..Vernon_6 1.9642536 Mt..Vernon_7
## 158 M6.8 Mt..Vernon_6 2.4323315 Mt..Vernon_7
## 159 M6.9 Mt..Vernon_6 4.2313307 Athena_6
## 160 M6.10 Mt..Vernon_6 1.8711618 Mt..Vernon_7
## 161 M6.11 Mt..Vernon_6 1.7862796 Mt..Vernon_3
## 162 M6.12 Mt..Vernon_6 1.5991840 Mt..Vernon_4
## 164 M7.2 Mt..Vernon_7 0.7722281 Mt..Vernon_3
## 167 M7.5 Mt..Vernon_7 0.7722281 Mt..Vernon_3
## 172 M7.10 Mt..Vernon_7 0.7722281 Mt..Vernon_3
## 174 M7.12 Mt..Vernon_7 1.4441787 Mt..Vernon_8
## 175 M8.1 Mt..Vernon_8 0.8804644 Mt..Vernon_4
## 176 M8.2 Mt..Vernon_8 1.3619118 Mt..Vernon_7
## 177 M8.3 Mt..Vernon_8 1.0214315 Mt..Vernon_7
## 182 M8.8 Mt..Vernon_8 0.8537276 Mt..Vernon_7
## 183 M8.9 Mt..Vernon_8 0.6793190 Mt..Vernon_3
The table shows many individuals that are closest to the centroid of another subpopulation. The following individuals are closest to a subpopulation of another population:
Distances.ind <- left_join(Distances.ind,
unique(agc.env[, c("Pop_Subpop", "Pop")]),
by = c("Subpop.name" = "Pop_Subpop"))
names(Distances.ind)[which(names(Distances.ind) == "Pop")] <- "Pop.name"
Distances.ind <- left_join(Distances.ind,
unique(agc.env[, c("Pop_Subpop", "Pop")]),
by = c("closest.centroid.name" = "Pop_Subpop"))
Distances.ind$migrant.CHECK <- Distances.ind$Pop.name != Distances.ind$Pop
t(Distances.ind[Distances.ind$migrant.CHECK == TRUE, ])
## 109 113 151
## Athena_1 "1.977050" "2.203123" "2.238959"
## Athena_10 "1.097736" "1.866381" "2.021636"
## Athena_2 "2.876404" "2.808093" "3.080425"
## Athena_3 "2.604926" "2.568527" "2.422319"
## Athena_4 "2.270604" "2.252781" "2.261538"
## Athena_5 "0.7318756" "1.6575073" "2.1018160"
## Athena_6 "1.181373" "1.344370" "1.969170"
## Athena_7 "1.425368" "1.707656" "1.596880"
## Athena_8 "2.543254" "2.565118" "2.366459"
## Athena_9 "1.579760" "1.705090" "1.666623"
## Mt..Vernon_1 "5.138642" "5.081076" "5.223756"
## Mt..Vernon_2 "3.889456" "3.706690" "3.959358"
## Mt..Vernon_3 "5.442829" "5.360604" "5.523258"
## Mt..Vernon_4 "5.586698" "5.566826" "5.620782"
## Mt..Vernon_5 "5.663573" "5.629187" "5.606125"
## Mt..Vernon_6 "4.227401" "4.080352" "4.193595"
## Mt..Vernon_7 "5.768938" "5.706049" "5.844881"
## Mt..Vernon_8 "5.788329" "5.741306" "5.864021"
## Ind.name "M2.2" "M2.6" "M6.1"
## Subpop.name "Mt..Vernon_2" "Mt..Vernon_2" "Mt..Vernon_6"
## Subpop.focal "3.889456" "3.706690" "4.193595"
## closest.centroid.index "6" "7" "8"
## closest.centroid.name "Athena_5" "Athena_6" "Athena_7"
## migrant.check "TRUE" "TRUE" "TRUE"
## Pop.name "Mt..Vernon" "Mt..Vernon" "Mt..Vernon"
## Pop "Athena" "Athena" "Athena"
## migrant.CHECK "TRUE" "TRUE" "TRUE"
## 155 159
## Athena_1 "2.111941" "2.067680"
## Athena_10 "1.693429" "1.704386"
## Athena_2 "2.547926" "2.733789"
## Athena_3 "2.829112" "2.453341"
## Athena_4 "2.493998" "2.120513"
## Athena_5 "1.5342338" "1.4727124"
## Athena_6 "1.346801" "1.108549"
## Athena_7 "1.751596" "1.528935"
## Athena_8 "2.817157" "2.449772"
## Athena_9 "1.874533" "1.526067"
## Mt..Vernon_1 "4.860440" "5.199892"
## Mt..Vernon_2 "3.607321" "3.850684"
## Mt..Vernon_3 "5.180987" "5.491596"
## Mt..Vernon_4 "5.347532" "5.678419"
## Mt..Vernon_5 "5.412420" "5.754068"
## Mt..Vernon_6 "3.862963" "4.231331"
## Mt..Vernon_7 "5.492316" "5.829284"
## Mt..Vernon_8 "5.528937" "5.863800"
## Ind.name "M6.5" "M6.9"
## Subpop.name "Mt..Vernon_6" "Mt..Vernon_6"
## Subpop.focal "3.862963" "4.231331"
## closest.centroid.index "7" "7"
## closest.centroid.name "Athena_6" "Athena_6"
## migrant.check "TRUE" "TRUE"
## Pop.name "Mt..Vernon" "Mt..Vernon"
## Pop "Athena" "Athena"
## migrant.CHECK "TRUE" "TRUE"
It is possible now to label these individuals in the ordination graph
agc.cap <- caprescale(capscale(agc.comm ~ Pop + Pop_Subpop, data = agc.env))
plot1a <- ordiplot(agc.cap, choices=c(1, 2))
attach(agc.env)
sites1 <- sites.long(plot1a, env.data=agc.env)
axis1 <- axis.long(agc.cap, choices=c(1, 2))
centroids.Subpop1 <- centroids.long(sites1, grouping=Pop_Subpop, centroids.only=TRUE)
centroids.Subpop2 <- centroids.long(sites1, grouping=Pop_Subpop, centroids.only=FALSE)
centroids.Pop1 <- centroids.long(sites1, grouping=Pop, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop, centroids.only=FALSE)
Labels are added for the individuals that were flagged.
Distances.ind$migrant.label <- rep("", nrow(Distances.ind))
for (i in 1:nrow(Distances.ind)) {
if (Distances.ind[i, "migrant.CHECK"] == TRUE) {
Distances.ind[i, "migrant.label"] <- Distances.ind[i, "Ind.name"]
}
}
sites1 <- left_join(sites1,
Distances.ind[, c("Ind.name", "migrant.label")],
by = c("labels" = "Ind.name"))
Now the plot is made
plotgg2 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_segment(data=centroids.Subpop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop_Subpop),
size=0.4, show.legend=TRUE) +
geom_point(data=subset(sites1, migrant.label != ""),
aes(x=axis1, y=axis2),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=sites1,
aes(x=axis1, y=axis2, label=migrant.label),
colour="black", alpha=0.7, size=2, show.legend=FALSE) +
guides(colour = guide_legend(ncol=2)) +
BioR.theme +
labs(colour = "Subpopulation") +
theme(legend.text = element_text(size = 8)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(unique(Pop_Subpop)), c=90, l=50)) +
coord_fixed(ratio=0.25)
plotgg2
Again we can have a closer look:
plotgg2 + coord_fixed(ratio=0.3, xlim=c(-4, -2), ylim=c(-5, 1))
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Whether these flagged individuals indeed are recent migrants needs to be verified. Another possibility is that these were mislabelled during the research process. But it would be interesting to check if alternative procedures such as available through the STRUCTURE Bayesian software also flag these individuals.
Below is a an example of AMOVA with the nancycats data from adegenet (microsatellites of 237 stray cats from 17 colonies in Nancy, France) and the poppr::diss.dist.
Note that both the poppr::amova and vegan::capscale procedures below required a correction method (in the below, the method developed by Lingoes) to avoid negative eigenvalues (see also Legendre and Legendre 2012 page 502).
** AMOVA with another distance measure in poppr
# Load the nancycats dataset and construct the repeat vector.
data(nancycats)
nancycats
## /// GENIND OBJECT /////////
##
## // 237 individuals; 9 loci; 108 alleles; size: 150.5 Kb
##
## // Basic content
## @tab: 237 x 108 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 8-18)
## @loc.fac: locus factor for the 108 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: genind(tab = truenames(nancycats)$tab, pop = truenames(nancycats)$pop)
##
## // Optional content
## @pop: population of each individual (group size range: 9-23)
## @other: a list containing: xy
names(alleles(nancycats)) <- locNames(nancycats) # small bug in this data set
# avoid removal of these loci internally in poppr.amova
nancycats2 <- missingno(nancycats, type = "loci", cutoff=0)
##
## Found 617 missing values.
##
## 3 loci contained missing values greater than 0%
##
## Removing 3 loci: fca8, fca45, fca96
cat.dist <- diss.dist(nancycats2, percent=TRUE)
strata(nancycats2) <- data.frame(Pop=nancycats2@pop)
poppr.amova(nancycats2, ~Pop, within=FALSE, dist=cat.dist, squared=FALSE, correction="lingoes")
##
## No missing values detected.
## Lingoes constant = 0.581114
## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
##
## $results
## Df Sum Sq Mean Sq
## Between samples 16 23.99833 1.4998956
## Within samples 220 172.41612 0.7837097
## Total 236 196.41445 0.8322646
##
## $componentsofcovariance
## Sigma %
## Variations Between samples 0.05165038 6.183008
## Variations Within samples 0.78370966 93.816992
## Total variations 0.83536004 100.000000
##
## $statphi
## Phi
## Phi-samples-total 0.06183008
# amova.data2
Instead of working with a community data set, the distance matrix will be used as response in a capscale model. The distance matrix is already available, but the environmental data set needs to be created.
cat.env <- data.frame(Pop = nancycats@pop)
str(cat.env)
## 'data.frame': 237 obs. of 1 variable:
## $ Pop: Factor w/ 17 levels "P01","P02","P03",..: 1 1 1 1 1 1 1 1 1 1 ...
cat.cap <- capscale(cat.dist ~ Pop, data = cat.env, add='lingoes')
cat.cap
## Call: capscale(formula = cat.dist ~ Pop, data = cat.env, add =
## "lingoes")
##
## Inertia Proportion Rank
## Total 196.4145 1.0000
## Constrained 23.9983 0.1222 16
## Unconstrained 172.4161 0.8778 220
## Inertia is Lingoes adjusted squared Unknown distance
##
## Eigenvalues for constrained axes:
## CAP1 CAP2 CAP3 CAP4 CAP5 CAP6 CAP7 CAP8 CAP9 CAP10 CAP11 CAP12 CAP13
## 3.732 2.731 2.484 1.986 1.744 1.637 1.563 1.365 1.280 1.119 0.998 0.952 0.715
## CAP14 CAP15 CAP16
## 0.658 0.566 0.469
##
## Eigenvalues for unconstrained axes:
## MDS1 MDS2 MDS3 MDS4 MDS5 MDS6 MDS7 MDS8
## 6.132 5.697 5.337 4.912 4.498 4.258 3.737 3.613
## (Showing 8 of 220 unconstrained eigenvalues)
##
## Constant added to distances: 0.5811136
cat.cap2 <- caprescale(cat.cap, verbose=TRUE)
## SSTot obtained from sum of all eigenvalues: 196.4145
## SSTot obtained from sum of all positive eigenvalues: 196.4145
## SSTot reflected by distances among site scores on all axes: 219.5196
## SSExpl obtained from eigenvalues of constrained axes: 23.99833
## SSExpl reflected by distances among site scores on constrained axes (scaling 1): 47.10343
## SSExpl reflected by distances among fitted site scores on constrained axes (scaling 1): 23.99833
## SSRes obtained from eigenvalues of positive unconstrained axes: 172.4161
## SSRes reflected by distances among site scores on positive unconstrained axes (scaling 1): 172.4161
plot3 <- ordiplot(cat.cap2, choices=c(1, 2))
Extract the scores (again based on these scripts).
attach(cat.env)
sites1 <- sites.long(plot3, env.data=cat.env)
axis1 <- axis.long(cat.cap2, choices=c(1, 2))
centroids.Pop1 <- centroids.long(sites1, grouping=Pop, centroids.only=TRUE)
centroids.Pop2 <- centroids.long(sites1, grouping=Pop, centroids.only=FALSE)
plotgg3 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop),
size=0.7, show.legend=TRUE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
labs(colour = "Population") +
guides(colour = guide_legend(ncol=2)) +
BioR.theme +
theme( legend.text = element_text(size = 8)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(levels(Pop)), c=90, l=50)) +
coord_fixed(ratio=1)
plotgg3
An alternative plot with ellipses.
plot2 <- ordiplot(cat.cap2, choices=c(1,2))
Pop.ellipses <- ordiellipse(plot2, groups=Pop, display="sites", kind="se")
Pop.ellipses.long2 <- ordiellipse.long(Pop.ellipses, grouping.name="Pop")
plotgg3 <- ggplot() +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
xlab(axis1[1, "label"]) +
ylab(axis1[2, "label"]) +
scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
geom_polygon(data=Pop.ellipses.long2,
aes(x=axis1, y=axis2, colour=Pop,
fill=after_scale(alpha(colour, 0.2))),
size=0.2, show.legend=TRUE) +
geom_segment(data=centroids.Pop2,
aes(x=axis1c, y=axis2c, xend=axis1, yend=axis2, colour=Pop),
size=0.7, show.legend=FALSE) +
geom_point(data=centroids.Pop1,
aes(x=axis1c, y=axis2c),
shape="square", colour="black", alpha=0.7, size=2) +
geom_label_repel(data=centroids.Pop1,
aes(x=axis1c, y=axis2c, label=Pop, colour=Pop),
alpha=1.0, size=2, show.legend=FALSE) +
labs(colour = "Population") +
guides(colour = guide_legend(ncol=2)) +
BioR.theme +
theme(legend.text = element_text(size = 8)) +
scale_colour_manual(values=colorspace::rainbow_hcl(length(levels(Pop)), c=90, l=50)) +
coord_fixed(ratio=1)
plotgg3
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] tcltk stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggrepel_0.8.2 dplyr_1.0.2 ggforce_0.3.2
## [4] ggsci_2.9 ggplot2_3.3.2 poppr_2.8.6
## [7] adegenet_2.1.3 ade4_1.7-16 BiodiversityR_2.12-3
## [10] vegan_2.5-6 lattice_0.20-41 permute_0.9-5
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.7 Hmisc_4.4-0
## [4] fastmatch_1.1-0 plyr_1.8.6 igraph_1.2.6
## [7] sp_1.4-2 splines_4.0.2 digest_0.6.25
## [10] htmltools_0.5.0 gdata_2.18.0 relimp_1.0-5
## [13] magrittr_1.5 checkmate_2.0.0 cluster_2.1.0
## [16] openxlsx_4.1.5 tcltk2_1.2-11 gmodels_2.18.1
## [19] sandwich_2.5-1 prettyunits_1.1.1 jpeg_0.1-8.1
## [22] colorspace_1.4-1 mitools_2.4 haven_2.3.1
## [25] xfun_0.15 jsonlite_1.6.1 crayon_1.3.4
## [28] lme4_1.1-23 survival_3.1-12 zoo_1.8-8
## [31] phangorn_2.5.5 ape_5.4 glue_1.4.1
## [34] polyclip_1.10-0 gtable_0.3.0 seqinr_4.2-4
## [37] V8_3.4.0 polysat_1.7-4 car_3.0-8
## [40] abind_1.4-5 scales_1.1.1 DBI_1.1.0
## [43] Rcpp_1.0.4.6 xtable_1.8-4 progress_1.2.2
## [46] spData_0.3.8 htmlTable_2.0.0 units_0.6-7
## [49] foreign_0.8-80 spdep_1.1-5 Formula_1.2-3
## [52] survey_4.0 htmlwidgets_1.5.1 RColorBrewer_1.1-2
## [55] acepack_1.4.1 ellipsis_0.3.1 pkgconfig_2.0.3
## [58] farver_2.0.3 nnet_7.3-14 deldir_0.2-3
## [61] labeling_0.3 tidyselect_1.1.0 rlang_0.4.8
## [64] reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0
## [67] cellranger_1.1.0 tools_4.0.2 generics_0.1.0
## [70] evaluate_0.14 stringr_1.4.0 fastmap_1.0.1
## [73] yaml_2.2.1 knitr_1.28 zip_2.0.4
## [76] purrr_0.3.4 nlme_3.1-148 mime_0.9
## [79] concaveman_1.1.0 compiler_4.0.2 rstudioapi_0.11
## [82] curl_4.3 png_0.1-7 e1071_1.7-3
## [85] tibble_3.0.1 statmod_1.4.34 tweenr_1.0.1
## [88] stringi_1.4.6 forcats_0.5.0 Matrix_1.2-18
## [91] classInt_0.4-3 nloptr_1.2.2.1 vctrs_0.3.4
## [94] effects_4.1-4 RcmdrMisc_2.7-0 pillar_1.4.4
## [97] LearnBayes_2.15.1 lifecycle_0.2.0 data.table_1.12.8
## [100] raster_3.4-5 httpuv_1.5.4 R6_2.4.1
## [103] latticeExtra_0.6-29 promises_1.1.1 KernSmooth_2.23-17
## [106] gridExtra_2.3 rio_0.5.16 codetools_0.2-16
## [109] boot_1.3-25 MASS_7.3-51.6 gtools_3.8.2
## [112] withr_2.2.0 nortest_1.0-4 Rcmdr_2.6-2
## [115] pegas_0.14 mgcv_1.8-31 expm_0.999-5
## [118] parallel_4.0.2 hms_0.5.3 quadprog_1.5-8
## [121] grid_4.0.2 rpart_4.1-15 coda_0.19-3
## [124] class_7.3-17 minqa_1.2.4 rmarkdown_2.3
## [127] carData_3.0-4 sf_0.9-6 shiny_1.4.0.2
## [130] base64enc_0.1-3