Schefferville transitions - constrained clustering

Tammy L. Elliott

March 4, 2015

R version 2.12 and 3.01 (vers. 2012 for constrained.clust and mvpart)

Data preparation

#add 0 values for 11 species to large grid community matrix
abd.sp.allsp<-cbind(abd.sp, sp.zero.com.matrix.zeros)

#again, find difference in what is in phylogeny and plots
xx<-names(abd.sp.allsp)
yy<-trans.one.tree$tip.label
xxyy<-setdiff(xx,yy)
yyxx<-setdiff(yy,xx)
xxyy

## character(0)

yyxx

## [1] "ComUmb" "CorAlt" "CorSto" "HydArb" "TheHum" "XimAme"

#Calculate pairwise phylogenetic distance for matrix
#phy.dist<-cophenetic(abd.sp.allsp, trans.one.tree)

Calculate environmental distances between different variables

Gower dissimilarity for mixed variables. ‘gowdist’ implements Podani’s (1999) extension to ordinal variables.

gowdis converts the Gower (1971) similarity coefficient to a dissimilarity coefficient.

#Truncate site data with different environmental variables
site.lat<-site[,4,drop=FALSE]
site.lon<-site[,5,drop=FALSE]
site.elev<-site[,6,drop=FALSE]
site.vis<-site[,10,drop=FALSE]
site.slope<-site[,8,drop=FALSE]
site.soil.dep.ind<-site[,c(14:17,drop=FALSE)]
site.soil<-apply(site.soil.dep.ind,1,mean)


#Calculate soil moist
soil.moist<-site$Soil.moisture
soil.moist.hydric<-gsub("hydric", "1", soil.moist)
soil.moist.mesic.hydric<-gsub("mesic-1", "2", soil.moist.hydric)
soil.moist.mesic<-gsub("mesic", "3", soil.moist.mesic.hydric)
soil.moist.mesic.xeric<-gsub("xeric-3", "4", soil.moist.mesic)
soil.moist.xeric<-gsub("xeric", "5", soil.moist.mesic.xeric)

site.moist<-as.numeric(soil.moist.xeric)

#Create matrix of variables 
site.env<-cbind(site.elev,site.vis, site.slope,site.soil, site.moist)
head(site.env)

##     Elevation..m. Sky.visible.... Slope..Degrees. site.soil site.moist
## EA1           523              95               0    22.250          2
## EB1           522             100               0    22.500          2
## EC1           524             100               0    14.500          2
## ED1           532              80               0    18.125          2
## EE1           522             100               0    26.375          1
## EF1           531              80               0    30.000          2

#Calculate Gower's distance; the results are a dissimilarity matrix
site.env.dist<-gowdis(site.env)

K-means clustering of environmental distances

From Chapter 8 - Cluster analysis of Legendre and Legendre (2012)

The objective function of K-means is that the partition should minimize the total error sum of squares. The major problem with K-means is that the solution on which the computation eventually converges depends to some extent on the initial positions of the centroids. This is the ‘local minimum’ problem in algorithms.

Algorithm minimizes within-group sum of squares.

Principal Coordinates Analysis is suitable if one wishes to ordinate objects on the basis of another distance measure, more appropriate to the problem at hand. It provides a Euclidean representation of a set of objects whose relationships are measured by and similarity or distance measure chosen by the user.

For the following PCoA plots, PCoA is run on a Euclidean distance matrix compute on a Hellinger-transformed species abundance matrix; the result of these two operations is a Hellinger distance matrix (Boccard 2011)

ccas.env<-cascadeKM((site.env.dist),2,15)
plot(ccas.env,sortq=TRUE)

#Try K-means clustering with 3,5 and 6-10 clusters

vasc.env.clust.3<-kmeans(site.env.dist,3)
vasc.env.clust.5<-kmeans(site.env.dist,5)
vasc.env.clust.6<-kmeans(site.env.dist,6)
vasc.env.clust.7<-kmeans(site.env.dist,7)
vasc.env.clust.8<-kmeans(site.env.dist,8)
vasc.env.clust.9<-kmeans(site.env.dist,9)
vasc.env.clust.10<-kmeans(site.env.dist,10)


#Plot PCoA with 3 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.3<-cmdscale(site.env.dist, k = 54, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.env.3[,1]
y<-vasc.env.3[,2]
plot(vasc.env.3, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with three \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.3, vasc.env.clust.3$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.3, vasc.env.clust.3$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.3), cex=0.4)

vasc.env.3<-cmdscale(site.env.dist, k = 54, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.env.3[,1]
y<-vasc.env.3[,2]
plot(vasc.env.3, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with three \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.3, vasc.env.clust.3$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.3, vasc.env.clust.3$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 5 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.5<-cmdscale(site.env.dist, k = 54, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.env.5[,1]
y<-vasc.env.5[,2]
plot(vasc.env.5, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with five \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.5, vasc.env.clust.5$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.5, vasc.env.clust.5$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.5), cex=0.4)

vasc.env.5<-cmdscale(site.env.dist, k = 54, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.env.5[,1]
y<-vasc.env.5[,2]
plot(vasc.env.5, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with five \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.5, vasc.env.clust.5$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.5, vasc.env.clust.5$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 6 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.6<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.6[,1]
y<-vasc.env.6[,2]
plot(vasc.env.6, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with six \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.6, vasc.env.clust.6$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.6, vasc.env.clust.6$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.6), cex=0.4)

vasc.env.6<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.6[,1]
y<-vasc.env.6[,2]
plot(vasc.env.6, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with six \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.6, vasc.env.clust.6$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.6, vasc.env.clust.6$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 7 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.7<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.7[,1]
y<-vasc.env.7[,2]
plot(vasc.env.7, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with seven \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.7, vasc.env.clust.7$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.7, vasc.env.clust.7$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.7), cex=0.4)

vasc.env.7<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.7[,1]
y<-vasc.env.7[,2]
plot(vasc.env.7, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with seven \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.7, vasc.env.clust.7$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.7, vasc.env.clust.7$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 8 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.8<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.8[,1]
y<-vasc.env.8[,2]
plot(vasc.env.8, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with eight \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.8, vasc.env.clust.8$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.8, vasc.env.clust.8$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.8), cex=0.4)

vasc.env.8<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.8[,1]
y<-vasc.env.8[,2]
plot(vasc.env.8, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with eight \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.8, vasc.env.clust.8$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.8, vasc.env.clust.8$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 9 groupings based on k-means clustering for environmental distances
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.env.9<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.9[,1]
y<-vasc.env.9[,2]
plot(vasc.env.9, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with nine \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.9, vasc.env.clust.9$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.9, vasc.env.clust.9$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(vasc.env.9), cex=0.4)

vasc.env.9<-cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret = FALSE)

## Warning in cmdscale(site.env.dist, k = 56, eig = FALSE, add = FALSE, x.ret
## = FALSE): only 54 of the first 56 eigenvalues are > 0

x<-vasc.env.9[,1]
y<-vasc.env.9[,2]
plot(vasc.env.9, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with nine \nclusters based on K-means clustering on \npairwise environmental distances", cex=1, pch=16, col="black")
ordispider(vasc.env.9, vasc.env.clust.9$cluster, col="grey40", label=TRUE)
ordiellipse(vasc.env.9, vasc.env.clust.9$cluster, col="grey70")
box(lwd=2)

***

Constrained clustering and Ward’s minimum variance

The following information is taken directly from Legendre and Legendre (2012)

Constrained clustering - external information about the sampling design is used by the clustering algorithm, in addition to the distance relationships among objects. Used to put multivariate data (species abundance) into spatially-constrained clusters.

Complete linkage clustering is the fusion of two clusters depending on the most distance pair of objects instead of the closest. Thus, an object joins a cluster only when it is linked to all the objects already members of a cluster. In the same way, two clusters can fuse only when all objects of the first are linked to all objects of the second.

As a cluster grows, it becomes more and more difficult for new objects to join to it because the new objects should bear links with all the objects already in the cluster before being incorporated.

This type of linkage is often desirable in ecology, when one wishes to delineate clusters with clear discontinuities.

Ward’s minimum variance method is related to the centroid methods in that it also leads to a geometric representation in which cluster centroids play a key role.

In the beginning of the procedure, each object is in a cluster of its own, so that the distance of an object to its cluster’s centroid is 0; hence the sum of all these distances is also 0. As clusters form, the centroids move away from actual object coordinates and the sums of the squared distances from the objects to the centroids increase. At each clustering step, Ward’s method finds the pair of objects or clusters whose fusion increases as little as possible the sum, over all groups formed so far, of the squared distances between objects and cluster centroids; that sum is the total within-group sum-of-squares.

#calculate Euclidean distances between different plots based on elevations for constrained clustering
coord.dat<-cbind(site.lat, site.lon)
#the following creates a 0 and 1 links matrix to be input into constrained analysis
site.list <- nb2listw(tri2nb(coord.dat), style="B")

## 
##      PLEASE NOTE:  The components "delsgs" and "summary" of the 
##      object returned by deldir() are now DATA FRAMES rather than 
##      matrices (as they were prior to release 0.0-18). 
##      See help("deldir").
##  
##      PLEASE NOTE: The process that deldir() uses for determining
##      duplicated points has changed from that used in version
##      0.0-9 of this package (and previously). See help("deldir").

links.mat.dat <- listw2mat(site.list)

sp.hel<-decostand(abd.sp.allsp, "hel")
sp.dist<-dist(sp.hel)

# Constrained clustering with distances
#abd.res <- constrained.clust(sp.dist, links.mat.dat)
#save(abd.res, file="abd.res.RData")
load(file="abd.res.RData")

#Cross-validation (long procedure)
"simpleRDA2" <-
function (Y, X, SS.Y, ...)
{
    Q <- qr(X, tol=1e-6)
    Yfit.X <- qr.fitted(Q, Y)
    SS <- sum(Yfit.X^2)
    if (missing(SS.Y)) SS.Y <- sum(Y^2)
    Rsquare <- SS/SS.Y
    list(Rsquare = Rsquare, m = Q$rank)
}

Cross validation of constrained clustering results

#Cross validation of constrained clustering results
#cross.abd <- cross(sp.hel, abd.res, k2=15)
#save(cross.abd, file="cross.abd.RData")
load(file="cross.abd.RData")
summary(cross.abd)

##              Length Class      Mode   
## AIC           84    -none-     numeric
## ngr.AIC        1    -none-     numeric
## classif.AIC  176    factor     numeric
## ngr.P          1    -none-     numeric
## classif.P    176    factor     numeric
## cvre.and.ngr   2    -none-     numeric
## classif.cvre 176    factor     numeric
## out           14    data.frame list

cross.abd$AIC

##          Rsquare      AICc       C-H       P(C-H)      cvre ngr.cross
## Gr.2  0.09886928 -4.251468 19.090742 2.137442e-05 0.9232242         2
## Gr.3  0.15169603 -4.300117 15.468166 6.602442e-07 0.8787299         3
## Gr.4  0.19839544 -4.344840 14.189879 2.634445e-08 0.8393618         4
## Gr.5  0.23656514 -4.381588 13.246919 2.013167e-09 0.8072241         5
## Gr.6  0.26789636 -4.411311 12.441512 2.698637e-10 0.7823718         6
## Gr.7  0.29218610 -4.432725 11.627221 6.950622e-11 0.7663170         7
## Gr.8  0.30802425 -4.442880 10.683296 4.260165e-11 0.7609075         8
## Gr.9  0.32810717 -4.459707 10.193943 1.503853e-11 0.7525160         9
## Gr.10 0.34148320 -4.467037  9.564628 1.083889e-11 0.7409901         8
## Gr.11 0.35412783 -4.473491  9.046851 8.124105e-12 0.7440883         8
## Gr.12 0.36596542 -4.478897  8.605543 6.428493e-12 0.7440609        11
## Gr.13 0.37677268 -4.482834  8.211817 5.542288e-12 0.7457101        12
## Gr.14 0.38743969 -4.486679  7.881827 4.707134e-12 0.7397131        12
## Gr.15 0.39746597 -4.489595  7.586059 4.191434e-12 0.7396604        12

# Cross validation with AICc values suggests 15 clusters

Dendrograms for Constrained clustering using Ward.D2 clustering

#dev.new(width=11.8, height=8)
plot(abd.res, hang=-1, cex=0.6, lwd=2, main="Constrained cluster dendrogram for vascular data based on Ward.D2 clustering", xlab="")

# Choose constrained cluster for vascular plants and make into 5 groups
c5 <- cutree(abd.res, 5)
# Choose constrained cluster for vascular plants and make into 6 groups
c6 <- cutree(abd.res, 6)
# Choose constrained cluster for vascular plants and make into 7 groups
c7 <- cutree(abd.res, 7)
# Choose constrained cluster for vascular plants and make into 8 groups
c8 <- cutree(abd.res, 8)
# Choose constrained cluster for vascular plants and make into 9 groups
c9 <- cutree(abd.res, 9)
# Choose constrained cluster for vascular plants and make into 10 groups
c10 <- cutree(abd.res, 10)
# Choose constrained cluster for vascular plants and make into 15 groups
c15 <- cutree(abd.res, 15)

# PCoA with five clusters based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfive clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c5, col="grey40", label=TRUE)
ordiellipse(veg.abd, c5, col="grey70")
text(x, y, rownames(veg.abd), cex=0.4)
box(lwd=2)

veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfive clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c5, col="grey40", label=TRUE)
ordiellipse(veg.abd, c5, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

# PCoA with six clusters based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nsix clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c6, col="grey40", label=TRUE)
ordiellipse(veg.abd, c6, col="grey70")
text(x, y, rownames(veg.abd), cex=0.4)
box(lwd=2)

veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nsix clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c6, col="grey40", label=TRUE)
ordiellipse(veg.abd, c6, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)


#Plot PCoA with 7 groupings based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nseven clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c7, col="grey40", label=TRUE)
ordiellipse(veg.abd, c7, col="grey70")
text(x, y, rownames(veg.abd), cex=0.4)
box(lwd=2)

veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nseven clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c7, col="grey40", label=TRUE)
ordiellipse(veg.abd, c7, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 8 groupings based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \neight clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c8, col="grey40", label=TRUE)
ordiellipse(veg.abd, c8, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 8 groupings based on constrained clustering
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \neight clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c8, col="grey40", label=TRUE)
ordiellipse(veg.abd, c8, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 9 groupings based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nnine clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c9, col="grey40", label=TRUE)
ordiellipse(veg.abd, c9, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 9 groupings based on constrained clustering
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nnine clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c9, col="grey40", label=TRUE)
ordiellipse(veg.abd, c9, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 10 groupings based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nten clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c10, col="grey40", label=TRUE)
ordiellipse(veg.abd, c10, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 10 groupings based on constrained clustering
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nten clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c10, col="grey40", label=TRUE)
ordiellipse(veg.abd, c10, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 15 groupings based on constrained clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfifteen clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c15, col="grey40", label=TRUE)
ordiellipse(veg.abd, c15, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

#Plot PCoA with 15 groupings based on constrained clustering
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-veg.abd[,1]
y<-veg.abd[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfifteen clusters based on constrained clustering", cex=1, pch=16, col="black")
ordispider(veg.abd, c15, col="grey40", label=TRUE)
ordiellipse(veg.abd, c15, col="grey70")
box(lwd=2)

#text(x, y, rownames(veg.abd), cex=0.4)

K-means clustering using Hellinger transformed abundance data for all vascular plants

# Results suggest that 3 clusters in optimal; therefore let's also try 3 groups for PCoA
ccas<-cascadeKM(decostand(abd.sp.allsp, "hell"),2,15)
plot(ccas,sortq=TRUE)

# Choose constrained cluster for vascular plants and make into 3 groups

#K-means clustering of vascular abundance data based on K-means clustering and Hellinger transformed data
veg.abd.k.clust<-kmeans(decostand(abd.sp.allsp, "hell"),3)

#Plot PCoA with 3 groupings based on k-means clustering
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) \nthree clusters based on K-means clustering using abundance data", cex=1, pch=16, col="black")
ordispider(veg.abd, veg.abd.k.clust$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd, veg.abd.k.clust$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

veg.abd<-cmdscale(sp.dist, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nthree clusters based on K-means clustering using abundance data", cex=1, pch=16, col="black")
ordispider(veg.abd, veg.abd.k.clust$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd, veg.abd.k.clust$cluster, col="grey70")
box(lwd=2)

Clustering of Sorensen’s beta diversity and phylogenetic beta diversity distances for both vascular plants and angiosperms only

Calculate Sorensen’s beta diversity and phylogenetic beta diversity

# Sorensen's beta diversity for all vasculars
sor.beta<-betadiver(abd.sp.allsp, "sor")
sor.beta.eu<-dist(sor.beta, method = "euclidean", diag = FALSE, upper = FALSE)

# Sorensen's beta diversity for only angiosperms
sor.beta.angio<-betadiver(angio.sp.abd, "sor")
sor.beta.angio.eu<-dist(sor.beta.angio, method = "euclidean", diag = FALSE, upper = FALSE)

# Phylsor calculation for all vasculars
beta.ps<-phylosor(abd.sp.allsp, trans.one.tree)
beta.ps.eu<-sqrt(dist(beta.ps))

# Phylsor calculation for angiosperms
angio.beta.ps<-phylosor(angio.sp.abd, trans.one.tree)
angio.beta.ps.eu<-dist(angio.beta.ps, method = "euclidean", diag = FALSE, upper = FALSE)


#spatial constrained clustering using Ward's D2 clustering for vascular beta diversity
#sor.beta.constr.clust.ward.D2<-constrained.clust(sor.beta, links.mat=links.mat.dat, method="ward.D2", beta=-0.25, verbose=FALSE, target=1)
#save(sor.beta.constr.clust.ward.D2, file="sor.beta.constr.clust.ward.D2.RData")
load(file="sor.beta.constr.clust.ward.D2.RData")

#constrained clustering with elevation using Ward's D2 clustering for angiosperm beta diversity
#angio.sor.beta.constr.clust.ward.D2<-constrained.clust(sor.beta.angio, links.mat=links.mat.dat, method="ward.D2", beta=-0.25, verbose=FALSE, target=1)
#save(angio.sor.beta.constr.clust.ward.D2, file="angio.sor.beta.constr.clust.ward.D2.RData")
load(file="angio.sor.beta.constr.clust.ward.D2.RData")

#constrained clustering with elevation using Ward's D2 clustering for vascular phylogenetic beta diversity
#phylosor.beta.constr.clust.ward.D2<-constrained.clust(beta.ps, links.mat=links.mat.dat, method="ward.D2", beta=-0.25, verbose=FALSE, target=1)
#save(phylosor.beta.constr.clust.ward.D2, file="phylosor.beta.constr.clust.ward.D2.RData")
load(file="phylosor.beta.constr.clust.ward.D2.RData")

#constrained clustering with elevation using Ward's D2 clustering for vascular phylogenetic beta diversity
#phylosor.angio.beta.constr.clust.ward.D2<-constrained.clust(angio.beta.ps, links.mat=links.mat.dat, method="ward.D2", beta=-0.25, verbose=FALSE, target=1)
#save(phylosor.angio.beta.constr.clust.ward.D2, file="phylosor.angio.beta.constr.clust.ward.D2.RData")
load(file="phylosor.angio.beta.constr.clust.ward.D2.RData")

Dendrograms of beta diversity and phylogenetic beta diversity distances among plots

#dev.new(width=11.8, height=8)
plot(sor.beta.constr.clust.ward.D2, cex=0.6, lwd=2, main="Constrained cluster dendrogram for vascular beta diversity based on Ward.D2 clustering", xlab="")

#dev.new(width=11.8, height=8)
plot(angio.sor.beta.constr.clust.ward.D2, cex=0.6, lwd=2, main="Constrained cluster dendrogram for angiosperm vascular beta diversity based on Ward.D2 clustering", xlab="")

#dev.new(width=11.8, height=8)
plot(phylosor.beta.constr.clust.ward.D2, cex=0.6, lwd=2, main="Constrained cluster dendrogram for vascular phylogenetic beta diversity data based on Ward.D2 clustering", xlab="")

#dev.new(width=11.8, height=8)
plot(phylosor.angio.beta.constr.clust.ward.D2, cex=0.6, lwd=2, main="Constrained cluster dendrogram for angiosperm phylogenetic beta diversity data based on Ward.D2 clustering", xlab="")

Cross validation of constrained clustering results

I only did cross valication for Sorensen’s beta diversity for vasculars, since constrained clustering results (see below) did not seem very promising.

#cross.abd.pd <- cross(sor.beta,sor.beta.constr.clust.ward.D2, k2=15)
#save(cross.abd, file="cross.abd.RData")
load(file="cross.abd.RData")
summary(cross.abd)

##              Length Class      Mode   
## AIC           84    -none-     numeric
## ngr.AIC        1    -none-     numeric
## classif.AIC  176    factor     numeric
## ngr.P          1    -none-     numeric
## classif.P    176    factor     numeric
## cvre.and.ngr   2    -none-     numeric
## classif.cvre 176    factor     numeric
## out           14    data.frame list

cross.abd$AIC

##          Rsquare      AICc       C-H       P(C-H)      cvre ngr.cross
## Gr.2  0.09886928 -4.251468 19.090742 2.137442e-05 0.9232242         2
## Gr.3  0.15169603 -4.300117 15.468166 6.602442e-07 0.8787299         3
## Gr.4  0.19839544 -4.344840 14.189879 2.634445e-08 0.8393618         4
## Gr.5  0.23656514 -4.381588 13.246919 2.013167e-09 0.8072241         5
## Gr.6  0.26789636 -4.411311 12.441512 2.698637e-10 0.7823718         6
## Gr.7  0.29218610 -4.432725 11.627221 6.950622e-11 0.7663170         7
## Gr.8  0.30802425 -4.442880 10.683296 4.260165e-11 0.7609075         8
## Gr.9  0.32810717 -4.459707 10.193943 1.503853e-11 0.7525160         9
## Gr.10 0.34148320 -4.467037  9.564628 1.083889e-11 0.7409901         8
## Gr.11 0.35412783 -4.473491  9.046851 8.124105e-12 0.7440883         8
## Gr.12 0.36596542 -4.478897  8.605543 6.428493e-12 0.7440609        11
## Gr.13 0.37677268 -4.482834  8.211817 5.542288e-12 0.7457101        12
## Gr.14 0.38743969 -4.486679  7.881827 4.707134e-12 0.7397131        12
## Gr.15 0.39746597 -4.489595  7.586059 4.191434e-12 0.7396604        12

PCoA ordination of constrained clusters for vascular phylogenetic beta diversity

# Choose constrained cluster for vascular plants and make into 5 groups for phylogenetic beta diversity
c5.beta.ps <- cutree(phylosor.beta.constr.clust.ward.D2, 5)

#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
vasc.beta.ps<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.beta.ps[,1]
y<-vasc.beta.ps[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfive clusters based on constrained clustering \non phylogenetic beta diversity", cex=1, pch=16, col="black")
ordispider(vasc.beta.ps, c5.beta.ps, col="grey40", label=TRUE)
ordiellipse(vasc.beta.ps, c5.beta.ps, col="grey70")
text(x, y, rownames(vasc.beta.ps), cex=0.4)
box(lwd=2)

vasc.beta.ps<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x<-vasc.beta.ps[,1]
y<-vasc.beta.ps[,2]
plot(x,y, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with \nfive clusters based on constrained clustering \non phylogenetic beta diversity", cex=1, pch=16, col="black")
ordispider(vasc.beta.ps, c5.beta.ps, col="grey40", label=TRUE)
ordiellipse(vasc.beta.ps, c5.beta.ps, col="grey70")
box(lwd=2)

K-means for Sorensen’s phylogenetic distances among plots including all vasculars

Since constrained clusters for phylogenetic distances does not look sensible, try K-means clustering instead.

Note that dendrograms are not possible for K-means clustering.

# Results suggest that 2 or 4 clusters in optimal for phylogenetic beta diversity
ccas.ps<-cascadeKM((beta.ps.eu),2,15)
plot(ccas.ps,sortq=TRUE)

vasc.ps.clust.two<-kmeans(beta.ps.eu,2)

vasc.ps.clust.four<-kmeans(beta.ps.eu,4)

#Plot PCoA with 2 groupings based on k-means clustering for Phylogenetic beta diversity values
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd.k.2<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd.k.2, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with two \nclusters based on K-means clustering on \nphylogenetic beta diversity pairwise distances", cex=1, pch=16, col="black")
ordispider(veg.abd.k.2, vasc.ps.clust.two$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd.k.2, vasc.ps.clust.two$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

veg.abd.k.2<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd.k.2, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with two \nclusters based on K-means clustering on \nphylogenetic beta diversity pairwise distances", cex=1, pch=16, col="black")
ordispider(veg.abd.k.2, vasc.ps.clust.two$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd.k.2, vasc.ps.clust.two$cluster, col="grey70")
box(lwd=2)

#Plot PCoA with 4 groupings based on k-means clustering for Phylogenetic beta diversity values
#dev.new(width=11.8, height=8)
par(mfrow=c(1,2))
veg.abd.k.4<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd.k.4, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with four \nclusters based on K-means clustering on \nphylogenetic beta diversity pairwise distances", cex=1, pch=16, col="black")
ordispider(veg.abd.k.4, vasc.ps.clust.four$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd.k.4, vasc.ps.clust.four$cluster, col="grey70")
box(lwd=2)
text(x, y, rownames(veg.abd), cex=0.4)

veg.abd.k.4<-cmdscale(beta.ps.eu, k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
plot(veg.abd.k.4, type="n", xlab="", ylab="", main="Principal Coordinate Analysis (PCoA) with four \nclusters based on K-means clustering on \nphylogenetic beta diversity pairwise distances", cex=1, pch=16, col="black")
ordispider(veg.abd.k.4, vasc.ps.clust.four$cluster, col="grey40", label=TRUE)
ordiellipse(veg.abd.k.4, vasc.ps.clust.four$cluster, col="grey70")
box(lwd=2)