This html document will help you work through a practical example ordination techniques. There are several parts that require you to figure out your own solution, which we will work through in class. We will be using a couple of different datasets, all of which are on Canvas. - let’s start by reading in our first dataset, going back to the morphology of herbivoirous reef fishes
herbivore.morphology <- read.csv(file = "data/herbivore_morphology.csv")
#feel free to take a look at the data
head(herbivore.morphology)
## family genus species gen.spe bodydepth
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri 0.045968691
## snoutlength eyediameter headlength peduncle
## 1 0.4877797 0.069172106 0.2315766 -0.03889361
## 2 0.4402623 0.000622937 0.2405350 -0.06240037
## 3 0.5386490 -0.008570787 0.2263199 -0.03512062
## 4 0.4782217 -0.004914155 0.2538896 -0.02705601
## 5 0.5661867 0.015159797 0.2443877 -0.04640333
## 6 0.5950563 -0.005349450 0.2244994 -0.06197221
# take a look at the means of the columns
colMeans(herbivore.morphology[5:9])
## bodydepth snoutlength eyediameter headlength peduncle
## 0.003329304 0.428684939 0.001989914 0.267733093 -0.003910865
# let's see if we can code this out the hard way first for two of our variables
herb.practice <- herbivore.morphology %>% #using our pipes
select(bodydepth, snoutlength) %>% # select two columns
mutate(bd.zscore = (bodydepth - mean(bodydepth))/sd(bodydepth), # let's apply the formula using mutate
sl.zscore = (snoutlength - mean(snoutlength))/sd(snoutlength))
head(herb.practice) # look at the data - clearly different
## bodydepth snoutlength bd.zscore sl.zscore
## 1 0.098504108 0.4877797 1.47082227 0.5194079
## 2 0.001397261 0.4402623 -0.02985761 0.1017584
## 3 0.034632323 0.5386490 0.48375384 0.9665194
## 4 0.075265132 0.4782217 1.11168937 0.4353988
## 5 0.100102812 0.5661867 1.49552849 1.2085590
## 6 0.045968691 0.5950563 0.65894499 1.4623064
practice.means <- herb.practice %>%
summarize(mean.zbd = mean(bd.zscore),
mean.zsl = mean(sl.zscore),
sd.zbd = sd(bd.zscore),
sd.zsl = sd(sl.zscore))
practice.means
## mean.zbd mean.zsl sd.zbd sd.zsl
## 1 -2.520945e-17 2.095355e-16 1 1
scaled.bd <- scale(herbivore.morphology$bodydepth)
# you can check that it did the job
mean(scaled.bd)
## [1] -2.405646e-17
sd(scaled.bd)
## [1] 1
# one wrinkle is that scale() turns our output into a matrix. Be aware! It can be turned into a vector using, you may guess, as.vector()
herb.scaled <- herbivore.morphology %>%
mutate_each_(list(~scale(.) %>% as.vector), vars=c(5:9))
## Warning: `mutate_each_()` was deprecated in dplyr 0.7.0.
## Please use `across()` instead.
# I won't unpack this code entirely, but it's a helpful way of performing operations over multiple columns
herb.scaled.long <- herb.scaled %>%
gather(5:9, key = "trait", value = "value")
herb.scaled.summary <- herb.scaled.long %>%
group_by(trait) %>%
summarize(mean = mean(value),
sd = sd(value))
trait.means <- ggplot(herb.scaled.summary, aes(x = trait, y = mean, color = trait)) +
geom_pointrange(aes(ymin = mean-sd, ymax = mean+sd)) +
geom_jitter(data = herb.scaled.long, aes(x = trait, y = value, color = trait), alpha = 0.2) +
theme_bw() +
scale_color_fish_d(option = "Acanthurus_sohal")
trait.means
- as you can see, we have successfully standardized our data, while maintaining relative differences in the values for our species
head(herbivore.morphology)
## family genus species gen.spe bodydepth
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri 0.045968691
## snoutlength eyediameter headlength peduncle
## 1 0.4877797 0.069172106 0.2315766 -0.03889361
## 2 0.4402623 0.000622937 0.2405350 -0.06240037
## 3 0.5386490 -0.008570787 0.2263199 -0.03512062
## 4 0.4782217 -0.004914155 0.2538896 -0.02705601
## 5 0.5661867 0.015159797 0.2443877 -0.04640333
## 6 0.5950563 -0.005349450 0.2244994 -0.06197221
str(herbivore.morphology)
## 'data.frame': 93 obs. of 9 variables:
## $ family : chr "Acanthuridae" "Acanthuridae" "Acanthuridae" "Acanthuridae" ...
## $ genus : chr "Acanthurus" "Acanthurus" "Acanthurus" "Acanthurus" ...
## $ species : chr "achilles" "albipectoralis" "auranticavus" "blochii" ...
## $ gen.spe : chr "Acanthurus.achilles" "Acanthurus.albipectoralis" "Acanthurus.auranticavus" "Acanthurus.blochii" ...
## $ bodydepth : num 0.0985 0.0014 0.0346 0.0753 0.1001 ...
## $ snoutlength: num 0.488 0.44 0.539 0.478 0.566 ...
## $ eyediameter: num 0.069172 0.000623 -0.008571 -0.004914 0.01516 ...
## $ headlength : num 0.232 0.241 0.226 0.254 0.244 ...
## $ peduncle : num -0.0389 -0.0624 -0.0351 -0.0271 -0.0464 ...
herbivore.meta <- herbivore.morphology[1:4]
herbivore.data <- herbivore.morphology[5:9]
rownames(herbivore.data) <- herbivore.meta$gen.spe
head(herbivore.data)
## bodydepth snoutlength eyediameter headlength
## Acanthurus.achilles 0.098504108 0.4877797 0.069172106 0.2315766
## Acanthurus.albipectoralis 0.001397261 0.4402623 0.000622937 0.2405350
## Acanthurus.auranticavus 0.034632323 0.5386490 -0.008570787 0.2263199
## Acanthurus.blochii 0.075265132 0.4782217 -0.004914155 0.2538896
## Acanthurus.dussumieri 0.100102812 0.5661867 0.015159797 0.2443877
## Acanthurus.fowleri 0.045968691 0.5950563 -0.005349450 0.2244994
## peduncle
## Acanthurus.achilles -0.03889361
## Acanthurus.albipectoralis -0.06240037
## Acanthurus.auranticavus -0.03512062
## Acanthurus.blochii -0.02705601
## Acanthurus.dussumieri -0.04640333
## Acanthurus.fowleri -0.06197221
# for this example, we'll work with the rda() function, which is native to vegan
# ironically, an RDA is actually a constrained ordination but by leaving it unconstrained (i.e. not adding any predictor variables), we are simply running a PCA - it's confusing I know
trait.pca <- rda(herbivore.data, scale = TRUE) # note the scale = TRUE part -- this makes sure our data are normalized as described above
biplot(trait.pca) # make a very ugly plot
- and that’s it, we just ran a PCA successfully. Woot woot!
trait.pca #start by calling the object
## Call: rda(X = herbivore.data, scale = TRUE)
##
## Inertia Rank
## Total 5
## Unconstrained 5 5
## Inertia is correlations
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5
## 2.8475 1.1003 0.6356 0.2725 0.1441
trait.pca.unscaled <- rda(herbivore.data, scale = F)
summary(trait.pca) # using summary, we get all the essential info for the PCA
##
## Call:
## rda(X = herbivore.data, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 5 1
## Unconstrained 5 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Eigenvalue 2.8475 1.1003 0.6356 0.2725 0.14407
## Proportion Explained 0.5695 0.2201 0.1271 0.0545 0.02881
## Cumulative Proportion 0.5695 0.7896 0.9167 0.9712 1.00000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 4.631157
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5
## bodydepth -1.050 -1.5373 -0.8116 0.3642 0.1827
## snoutlength -1.452 -1.0370 0.9612 -0.2152 -0.3697
## eyediameter -1.647 0.8518 -0.7641 0.1693 -0.4881
## headlength 1.688 -0.6794 -0.7037 -0.6245 -0.3083
## peduncle 1.856 -0.3067 0.2545 0.7558 -0.3387
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5
## Acanthurus.achilles -0.549256 -0.196847 -0.41673 0.826915 -0.26084
## Acanthurus.albipectoralis -0.192537 0.122071 0.17912 -0.036085 0.62505
## Acanthurus.auranticavus -0.324185 -0.232743 0.50241 0.312512 0.43580
## Acanthurus.blochii -0.213814 -0.415571 -0.06857 0.287514 0.48191
## Acanthurus.dussumieri -0.455527 -0.591079 0.02430 0.327667 0.05007
## Acanthurus.fowleri -0.454255 -0.362957 0.59424 0.135611 0.31241
## Acanthurus.guttatus -0.096086 -1.258641 -1.51551 -0.087712 0.59606
## Acanthurus.leucocheilus -0.784262 0.087402 0.09582 0.976815 -0.68697
## Acanthurus.leucopareius -0.390412 -0.642024 -0.32687 0.488518 0.74926
## Acanthurus.lineatus -0.100012 -0.213455 0.34984 0.426566 0.81645
## Acanthurus.maculiceps -0.383387 -0.409522 0.24407 0.061145 -0.45157
## Acanthurus.mata -0.401230 0.234969 0.28977 0.172868 0.22837
## Acanthurus.nigricans -0.386528 -0.345298 -0.30188 0.362352 0.03945
## Acanthurus.nigricauda -0.465632 -0.032885 0.53010 0.485782 -0.30592
## Acanthurus.nigrofuscus -0.091720 0.173699 -0.08377 0.391398 0.18417
## Acanthurus.nigroris -0.347660 0.004722 0.42290 0.593773 0.33824
## Acanthurus.olivaceus -0.322469 0.065212 0.45269 0.360310 0.12960
## Acanthurus.pyroferus -0.310583 -0.150954 0.08954 0.587208 -0.17343
## Acanthurus.thompsoni -0.347337 0.598348 0.16656 0.898583 0.03072
## Acanthurus.triostegus -0.120328 -0.178206 -0.24733 -0.108616 0.30333
## Acanthurus.xanthopterus -0.322311 -0.474877 -0.02990 -0.075403 0.35733
## Bolbometopon.muricatum 0.220941 0.015465 -0.81513 0.241744 -0.40580
## Calotomus.spinidens 0.736584 0.508882 0.41982 0.145244 0.12395
## Cetoscarus.ocellatus 0.515129 -0.477080 -0.29621 -0.796305 -0.98072
## Chlorurus.bleekeri 0.619642 0.091061 0.33170 0.118199 -0.38685
## Chlorurus.bowersi 0.678347 -0.280599 0.26759 -0.235371 0.27302
## Chlorurus.frontalis 0.393237 -0.110726 -0.21260 0.030249 -0.29273
## Chlorurus.japanensis 0.599980 -0.047435 0.32979 0.165712 0.03893
## Chlorurus.microrhinos 0.204752 -0.105734 -0.33058 0.690883 -0.11013
## Chlorurus.spilurus 0.717304 -0.040805 0.35929 -0.089668 -0.53735
## Ctenochaetus.binotatus -0.071096 -0.185757 -0.32754 0.106305 -0.64630
## Ctenochaetus.flavicauda -0.244049 -0.415097 -0.28747 0.500793 -0.24415
## Ctenochaetus.hawaiiensis -0.477079 -0.981251 0.52867 0.244165 -0.04086
## Ctenochaetus.marginatus -0.475728 -0.347915 0.16410 0.723585 0.37866
## Ctenochaetus.striatus -0.125231 -0.524514 0.38987 -0.006667 -0.02281
## Ctenochaetus.strigosus -0.110491 -0.629268 0.20461 0.126077 0.49351
## Ctenochaetus.tominiensis -0.004192 -0.335207 0.67845 0.134942 0.36380
## Hipposcarus.longiceps 0.295972 -0.326860 -0.60504 -0.947791 -0.24853
## Kyphosus.vaigiensis 0.543936 -0.241914 -1.55915 -0.972814 1.33836
## Leptoscarus.vaigiensis 0.733534 0.419722 0.26562 0.060847 -0.26997
## Naso.annulatus -0.671303 0.636315 0.40144 -0.031478 -0.40852
## Naso.brachycentron -0.740326 -0.012775 -0.25981 -1.087854 -0.54543
## Naso.brevirostris -0.654983 0.459308 0.38794 -0.338188 -0.09561
## Naso.hexacanthus -0.582123 0.672470 0.78234 -0.567505 -0.06626
## Naso.lituratus -0.338452 -0.205942 0.51040 -0.938095 -0.04949
## Naso.lopezi -0.642887 0.752491 0.32265 -0.509322 -0.53358
## Naso.thynnoides -0.277299 0.903328 0.67391 -0.819409 0.57055
## Naso.tuberosus -0.600209 0.343675 0.01078 -0.889882 -0.59383
## Naso.unicornis -0.727336 -0.259883 0.46713 -0.712284 0.14285
## Naso.vlamingii -0.784968 0.476269 0.41511 -0.481296 -0.56490
## Paracanthurus.hepatus -0.170708 -0.260372 0.40963 -0.304985 0.15924
## Scarus.altipinnis 0.362909 -0.433734 0.27530 0.096693 -0.55635
## Scarus.chameleon 0.735874 0.023917 0.25569 -0.053399 0.39927
## Scarus.dimidiatus 0.798730 0.076239 0.49850 0.149822 -0.06216
## Scarus.festivus 0.635275 0.044563 -0.18677 0.189500 0.02563
## Scarus.flavipectoralis 0.761533 0.219163 0.44764 -0.033140 0.53360
## Scarus.forsteni 0.557740 0.112582 0.14137 0.149350 0.06873
## Scarus.frenatus 0.710884 0.031198 0.23501 0.278360 -0.28597
## Scarus.ghobban 0.320318 -0.255042 -0.44972 -0.407212 -0.31276
## Scarus.globiceps 0.670007 -0.006700 0.22356 0.250030 -0.05515
## Scarus.hypselopterus 0.765772 0.352374 0.47912 0.265259 0.34841
## Scarus.longipinnis 0.382589 0.374147 0.24181 0.696125 -0.16082
## Scarus.niger 0.575313 -0.138538 -0.12514 0.287207 -0.19853
## Scarus.oviceps 0.787324 -0.010247 0.37864 -0.043355 -0.53162
## Scarus.prasiognathos 0.530023 -0.083433 0.10007 0.193815 0.15194
## Scarus.psittacus 0.679511 -0.194834 0.08906 0.092821 -0.04585
## Scarus.quoyi 0.569658 0.033128 -0.08175 0.055416 0.03527
## Scarus.rivulatus 0.505842 -0.172985 -0.13776 0.728452 0.21801
## Scarus.rubroviolaceus 0.348523 0.044175 -0.43462 -0.099810 -0.86521
## Scarus.schlegeli 0.706009 -0.069593 -0.06508 -0.179623 0.01703
## Scarus.spinus 0.578802 -0.064642 0.07582 0.346024 -0.54801
## Scarus.tricolor 0.657465 0.174872 0.22802 0.158247 0.01095
## Scarus.xanthopleura 0.463790 -0.088127 -0.45253 -0.403154 -0.79054
## Siganus.argenteus -0.146839 0.775781 0.35105 -0.523297 1.41224
## Siganus.canaliculatus 0.006319 0.891053 0.23832 -0.480920 0.62514
## Siganus.corallinus -0.080446 0.014277 -0.30966 -0.747394 0.25946
## Siganus.doliatus -0.123483 0.358985 -0.75453 -0.166010 0.41527
## Siganus.fuscescens -0.147512 0.791480 -0.30559 -0.184150 0.26082
## Siganus.guttatus -0.470678 0.617754 -1.56597 0.639240 -0.32156
## Siganus.javus -0.093929 0.249917 -0.57795 -0.362118 0.71322
## Siganus.lineatus -0.148844 0.383187 -0.70603 -0.197787 0.10004
## Siganus.niger -0.249042 0.298231 -0.43268 -0.475022 -0.55285
## Siganus.puellus 0.054996 0.429036 -0.11193 -0.771388 -0.05215
## Siganus.punctatissimus -0.479031 0.650957 -0.98195 0.597733 -0.13505
## Siganus.punctatus -0.302639 0.366550 -0.25728 0.100289 0.56360
## Siganus.randalli -0.098522 1.047577 -0.77982 0.130004 -1.27195
## Siganus.spinus -0.052700 1.388201 -0.02476 -0.082678 -0.03378
## Siganus.stellatus -0.287467 0.075813 -0.10246 0.096600 1.12423
## Siganus.vermiculatus -0.016556 0.041327 -0.67886 -0.307382 0.40666
## Zebrasoma.flavescens -0.649717 -0.639305 -0.13111 0.138910 -0.59445
## Zebrasoma.rostratum -0.094915 -1.611350 0.81659 -1.449383 -0.63718
## Zebrasoma.scopas -0.369763 -1.103403 -0.20763 -0.361144 0.13989
## Zebrasoma.velifer -0.854489 -0.265766 0.20787 0.740915 -0.45596
traits.princomp <- princomp(herbivore.data, cor = TRUE)
summary(traits.princomp)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.6874658 1.0489581 0.7972277 0.5220149 0.37957180
## Proportion of Variance 0.5695081 0.2200626 0.1271144 0.0544999 0.02881495
## Cumulative Proportion 0.5695081 0.7895708 0.9166851 0.9711850 1.00000000
head(traits.princomp$loadings)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## bodydepth 0.3003183 0.7075923 0.4915296 0.3368760 0.2324548
## snoutlength 0.4153583 0.4773299 -0.5821307 -0.1990198 -0.4702643
## eyediameter 0.4713013 -0.3921013 0.4627583 0.1566362 -0.6208474
## headlength -0.4828475 0.3127241 0.4261689 -0.5776465 -0.3921307
## peduncle -0.5310553 0.1411716 -0.1541336 0.6990675 -0.4308112
head(traits.princomp$scores)
## Comp.1 Comp.2 Comp.3 Comp.4
## Acanthurus.achilles 1.9300205 0.4299713 0.69181495 0.89886735
## Acanthurus.albipectoralis 0.6765503 -0.2666386 -0.29735111 -0.03922435
## Acanthurus.auranticavus 1.1391490 0.5083784 -0.83405828 0.33970452
## Acanthurus.blochii 0.7513157 0.9077271 0.11383492 0.31253132
## Acanthurus.dussumieri 1.6006690 1.2910868 -0.04033546 0.35617894
## Acanthurus.fowleri 1.5961977 0.7928034 -0.98649475 0.14741076
## Comp.5
## Acanthurus.achilles -0.20616924
## Acanthurus.albipectoralis 0.49404090
## Acanthurus.auranticavus 0.34445371
## Acanthurus.blochii 0.38090308
## Acanthurus.dussumieri 0.03957323
## Acanthurus.fowleri 0.24692882
# just calling biplot() produces a very ugly plot with very little information
biplot(trait.pca)
- gross…
# calling screeplot provides some useful information though
screeplot(trait.pca)
biplot(trait.pca,
display = c("sites", "species"),
type = c("text", "text"),
col = c("black", "grey"))
# make the basic plot
biplot(trait.pca,
type = c("text", "points"),
col = c("black", "grey"))
# add convex hulls around the points by family
ordihull(trait.pca,
group = herbivore.meta$family,
col = c("blue", "goldenrod2", "forestgreen", "orchid"))
# add legend
fam.names <- sort(unique(herbivore.meta$family))
legend("topright",
col = c("blue", "goldenrod2", "forestgreen", "orchid"),
lty = 1,
legend = fam.names)
- not the worst plot, not the best plot, and generally a pain in the neck to work with (at least IMO).
# the easiest way to get our summary is to create an object and then extract the required element
pca.summary <- summary(trait.pca)
sp.scores <- as.data.frame(pca.summary$sites) %>% # remember that our species are the sites
mutate(gen.spe = rownames(.)) %>%
inner_join(herbivore.meta) # because we are smart and efficient, we take the opportunity to merge the scores of species on the PC axes with our metadata
## Joining, by = "gen.spe"
head(sp.scores)
## PC1 PC2 PC3 PC4 PC5
## 1 -0.5492562 -0.1968472 -0.4167308 0.82691471 -0.26084223
## 2 -0.1925365 0.1220711 0.1791163 -0.03608451 0.62505315
## 3 -0.3241855 -0.2327432 0.5024143 0.31251181 0.43579767
## 4 -0.2138137 -0.4155709 -0.0685711 0.28751377 0.48191286
## 5 -0.4555274 -0.5910787 0.0242970 0.32766748 0.05006746
## 6 -0.4542550 -0.3629571 0.5942380 0.13561081 0.31241064
## gen.spe family genus species
## 1 Acanthurus.achilles Acanthuridae Acanthurus achilles
## 2 Acanthurus.albipectoralis Acanthuridae Acanthurus albipectoralis
## 3 Acanthurus.auranticavus Acanthuridae Acanthurus auranticavus
## 4 Acanthurus.blochii Acanthuridae Acanthurus blochii
## 5 Acanthurus.dussumieri Acanthuridae Acanthurus dussumieri
## 6 Acanthurus.fowleri Acanthuridae Acanthurus fowleri
trait.loadings <- as.data.frame(pca.summary$species) %>% # remember that our traits are the species lol
mutate(trait = rownames(.)) # no need to join anything
head(trait.loadings)
## PC1 PC2 PC3 PC4 PC5 trait
## bodydepth -1.049594 -1.5372541 -0.8115897 0.3642146 0.1827414 bodydepth
## snoutlength -1.451651 -1.0370060 0.9611858 -0.2151709 -0.3696923 snoutlength
## eyediameter -1.647169 0.8518456 -0.7640840 0.1693478 -0.4880713 eyediameter
## headlength 1.687522 -0.6793976 -0.7036693 -0.6245244 -0.3082686 headlength
## peduncle 1.856005 -0.3066973 0.2544979 0.7557992 -0.3386767 peduncle
pca.ggplot <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = family)) +
theme_bw() +
scale_color_fish_d(option = "Acanthurus_sohal")
pca.ggplot
pca.ggplot.pretty <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) + # work with species scores first
geom_point(aes(fill = family, shape = family), color = "grey", size = 2.5) + # add points to plot
geom_segment(data = trait.loadings, aes(x = 0, xend = PC1/2, y = 0, yend = PC2/2), lwd = 0.1) + # this is a new geom that allows you to draw segments like the ones for the loadings above
geom_text(data = trait.loadings, aes(x = PC1/2, y = PC2/2+0.1, label = trait), size = 3) + # this adds text to the labels
theme_bw() +
scale_fill_fish_d(option = "Acanthurus_sohal") +
scale_x_continuous(limits = c(-1,1)) +
scale_shape_manual(values = c(21:24)) +
theme(legend.position = "top") +
xlab("PC1 (57.0%)") + # good practice to indicate variance explained
ylab("PC2 (22.0%)")
pca.ggplot.pretty
# using the species scores, we can use the slice() function (native to dplyr) and the chull() function to caluclate each family's convex hull. Note the group_by argument!
family.hulls <- sp.scores %>%
group_by(family) %>%
slice(chull(PC1, PC2))
# now we can add the convex hulls
pca.plot.pretty.chulls <- pca.ggplot.pretty +
geom_polygon(data = family.hulls, aes(x = PC1, y = PC2, fill = family), alpha = 0.2, show.legend = F)
pca.plot.pretty.chulls
- and look how pretty that is! Now let’s see what you can do!
For this exercise, we will switch to a different dataset. You have a dataset called “fishcoms_lizardisland.csv” in your file folder. This dataset contains counts of reef fishes at fourteen sites around Lizard Island, as per this map below.
As you can see, the different sites have different symbols which indicate that they have different regimes of wave action. Some are exposed, some are oblique, some are sheltered, and some are in the lagoon. This may very well structure the reef fish community. The data were collected by divers that swam transects at each site, at two distinct depths. For simplicity, we will focus on a single family, the Pomacentridae, better known as reef poodles.
Here are the goals for this exercise:
lfish <- read.csv(file = "data/fishcoms_lizardisland.csv")
lizard.sum <- lfish %>%
filter(Family == "Pomacentridae") %>%
group_by(Site, Exposure, species) %>%
summarize(abundance = sum(abundance))
## `summarise()` has grouped output by 'Site', 'Exposure'. You can override using the `.groups` argument.
lizard.wide <- lizard.sum %>%
spread(key = species, value = abundance, fill = 0)
lizard.meta <- lizard.wide[1:2]
lizard.raw <- lizard.wide %>%
ungroup() %>%
select(-c(1:2))
lizard.pca <- summary(rda(lizard.raw, scale = T))
lizard.scores <- as.data.frame(lizard.pca$sites) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(PC1, PC2))
lizard.loadings <- as.data.frame(lizard.pca$species) %>%
mutate(species = rownames(.))
lizard.plot <- ggplot() +
geom_point(data = lizard.scores, aes(x = PC1, y = PC2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = PC1, PC2, fill = Exposure), alpha = 0.5) +
geom_segment(data = lizard.loadings, aes(x = 0, xend = PC1*3,
y = 0, yend = PC2*3), lwd = 0.1) +
geom_text(data = lizard.loadings, aes(x = PC1*3, y = PC2*3, label = species), size = 3) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PC1 (21.8%)") +
ylab("PC2 (15.5%)")
lizard.plot
head(lizard.wide) #lots of 0s in these data, so we will try our luck with the Hellinger transformation
## # A tibble: 6 × 57
## # Groups: Site, Exposure [6]
## Site Exposure abu.sexs abu.whit acn.poly amb.aure amb.cura amb.leuc amp.akin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Big V… Shelter… 0 4 23 0 57 7 3
## 2 Blue … Lagoon 4 0 35 0 307 1 0
## 3 Bommi… Oblique 0 0 62 0 57 21 0
## 4 Cocon… Exposed 0 45 181 0 64 13 0
## 5 Grani… Shelter… 1 0 9 0 1 0 0
## 6 LTMP1 Exposed 1 268 72 0 54 19 0
## # … with 48 more variables: amp.clar <dbl>, amp.mela <dbl>, amp.peri <dbl>,
## # chr.alis <dbl>, chr.lepi <dbl>, chr.marg <dbl>, chr.tern <dbl>,
## # chr.viri <dbl>, chr.webe <dbl>, chr.xant <dbl>, chy.cyan <dbl>,
## # chy.flav <dbl>, chy.rex <dbl>, chy.roll <dbl>, chy.talb <dbl>,
## # das.arua <dbl>, das.reti <dbl>, das.trim <dbl>, dis.mela <dbl>,
## # dis.pers <dbl>, dis.pros <dbl>, dis.pseu <dbl>, hgy.plag <dbl>,
## # neg.mela <dbl>, neg.nigr <dbl>, neo.azys <dbl>, neo.cyan <dbl>, …
lizard.hell <- decostand(lizard.raw, method = "hellinger") # the decostand function is native to vegan and allows for the implomentation of transformations like the Hellinger
head(lizard.hell)
## abu.sexs abu.whit acn.poly amb.aure amb.cura amb.leuc amp.akin
## 1 0.00000000 0.04563166 0.1094209 0 0.17225576 0.06036502 0.03951818
## 2 0.04161252 0.00000000 0.1230915 0 0.36455512 0.02080626 0.00000000
## 3 0.00000000 0.00000000 0.2353861 0 0.22569523 0.13699181 0.00000000
## 4 0.00000000 0.18284527 0.3667049 0 0.21805571 0.09827638 0.00000000
## 5 0.03634562 0.00000000 0.1090369 0 0.03634562 0.00000000 0.00000000
## 6 0.02358333 0.38607578 0.2001112 0 0.17330139 0.10279736 0.00000000
## amp.clar amp.mela amp.peri chr.alis chr.lepi chr.marg chr.tern
## 1 0.03951818 0.00000000 0 0.07903636 0.00000000 0.00000000 0.38920894
## 2 0.00000000 0.00000000 0 0.00000000 0.00000000 0.00000000 0.02942449
## 3 0.00000000 0.05978813 0 0.00000000 0.00000000 0.00000000 0.13369032
## 4 0.00000000 0.00000000 0 0.08619409 0.00000000 0.00000000 0.00000000
## 5 0.03634562 0.00000000 0 0.00000000 0.10903685 0.03634562 0.05140047
## 6 0.00000000 0.00000000 0 0.00000000 0.03335187 0.00000000 0.08503091
## chr.viri chr.webe chr.xant chy.cyan chy.flav chy.rex chy.roll
## 1 0.20279155 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.07903636
## 2 0.06579517 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.22019275
## 3 0.11577921 0.00000000 0.00000000 0.00000000 0.00000000 0.04227659 0.10355607
## 4 0.00000000 0.11238334 0.00000000 0.00000000 0.00000000 0.04721045 0.07211515
## 5 0.06295246 0.05140047 0.00000000 0.00000000 0.03634562 0.00000000 0.14538247
## 6 0.11553426 0.21355615 0.06239563 0.04084753 0.00000000 0.03335187 0.06239563
## chy.talb das.arua das.reti das.trim dis.mela dis.pers dis.pros
## 1 0.00000000 0.09679938 0.1020355 0.00000000 0.03226646 0.03226646 0.06036502
## 2 0.00000000 0.05504819 0.0000000 0.00000000 0.07207500 0.04161252 0.09978332
## 3 0.00000000 0.17685567 0.1232564 0.00000000 0.08455318 0.10355607 0.07322520
## 4 0.06676565 0.04721045 0.1907987 0.03854717 0.02725696 0.00000000 0.02725696
## 5 0.03634562 0.03634562 0.4794316 0.14076597 0.00000000 0.00000000 0.03634562
## 6 0.08169506 0.00000000 0.1131017 0.02358333 0.00000000 0.00000000 0.00000000
## dis.pseu hgy.plag neg.mela neg.nigr neo.azys neo.cyan pgy.dick
## 1 0.00000000 0.05101775 0.04563166 0.00000000 0.2200279 0.00000000 0.00000000
## 2 0.00000000 0.13322506 0.00000000 0.10192944 0.2514030 0.02942449 0.00000000
## 3 0.05978813 0.00000000 0.00000000 0.11957626 0.2296207 0.00000000 0.00000000
## 4 0.00000000 0.00000000 0.00000000 0.14163134 0.2542360 0.00000000 0.00000000
## 5 0.00000000 0.00000000 0.03634562 0.00000000 0.2298699 0.00000000 0.00000000
## 6 0.00000000 0.00000000 0.02358333 0.08503091 0.2028715 0.00000000 0.03335187
## pgy.lacr pom.adel pom.ambo pom.bank pom.brac pom.chry pom.coel
## 1 0.04563166 0.33063282 0.2488913 0.00000000 0.2115853 0.03951818 0.06844750
## 2 0.00000000 0.20806259 0.1995666 0.00000000 0.1638287 0.02942449 0.00000000
## 3 0.32196892 0.19373568 0.4087952 0.02989406 0.3781333 0.16644310 0.11957626
## 4 0.15176035 0.09827638 0.3022943 0.10902785 0.3393465 0.09442089 0.24226519
## 5 0.00000000 0.09616147 0.3964839 0.00000000 0.2838684 0.10280093 0.32508509
## 6 0.17489867 0.06239563 0.1247913 0.02358333 0.4751910 0.02358333 0.04716666
## pom.gram pom.lepi pom.molu pom.naga pom.pavo pom.phil pom.vaiu
## 1 0.00000000 0.10455528 0.6147586 0.18675564 0.00000000 0 0.00000000
## 2 0.19406787 0.00000000 0.7222500 0.02942449 0.00000000 0 0.00000000
## 3 0.02989406 0.07322520 0.3804893 0.13030520 0.05978813 0 0.00000000
## 4 0.00000000 0.33825002 0.3259459 0.11238334 0.00000000 0 0.05451393
## 5 0.00000000 0.09616147 0.4406672 0.24917300 0.00000000 0 0.00000000
## 6 0.05776713 0.32847780 0.4251546 0.09433333 0.00000000 0 0.00000000
## pom.ward pre.biac ste.apic ste.fasc ste.livi ste.nigr
## 1 0.12496746 0.03226646 0.0000000 0 0 0.03226646
## 2 0.06241878 0.02942449 0.0360375 0 0 0.08322504
## 3 0.06684516 0.00000000 0.1195763 0 0 0.05177804
## 4 0.16802321 0.00000000 0.1828453 0 0 0.00000000
## 5 0.05140047 0.00000000 0.0000000 0 0 0.00000000
## 6 0.12254259 0.00000000 0.1633901 0 0 0.00000000
lizard.pca <- summary(rda(lizard.hell, scale = T))
lizard.scores <- as.data.frame(lizard.pca$sites) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(PC1, PC2))
lizard.loadings <- as.data.frame(lizard.pca$species) %>%
mutate(species = rownames(.))
lizard.plot <- ggplot() +
geom_point(data = lizard.scores, aes(x = PC1, y = PC2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = PC1, PC2, fill = Exposure), alpha = 0.5) +
geom_segment(data = lizard.loadings, aes(x = 0, xend = PC1*3,
y = 0, yend = PC2*3), lwd = 0.1) +
geom_text(data = lizard.loadings, aes(x = PC1*3, y = PC2*3, label = species), size = 3) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PC1 (21.8%)") +
ylab("PC2 (17.8%)") +
ggtitle("Hellinger transformed PCA, scale = T")
lizard.plot
lizard.pca <- summary(rda(lizard.hell, scale = F))
lizard.scores <- as.data.frame(lizard.pca$sites) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(PC1, PC2))
lizard.loadings <- as.data.frame(lizard.pca$species) %>%
mutate(species = rownames(.))
lizard.plot <- ggplot() +
geom_point(data = lizard.scores, aes(x = PC1, y = PC2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = PC1, PC2, fill = Exposure), alpha = 0.5) +
geom_segment(data = lizard.loadings, aes(x = 0, xend = PC1*2,
y = 0, yend = PC2*2), lwd = 0.1) +
geom_text(data = lizard.loadings, aes(x = PC1*2, y = PC2*2, label = species), size = 3) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PC1 (30.3%)") +
ylab("PC2 (20.5%)") +
ggtitle("Hellinger transformed PCA, scale = F")
lizard.plot
# let's get the whole dataset in here and this time, we'll actually go with separate transects
lizard.sum <- lfish %>%
group_by(transect, Site, Exposure, species) %>%
summarize(abundance = sum(abundance)) # oof, almost 2000 datapoints
## `summarise()` has grouped output by 'transect', 'Site', 'Exposure'. You can override using the `.groups` argument.
lizard.wide <- lizard.sum %>%
spread(key = species, value = abundance, fill = 0)
lizard.meta <- lizard.wide[1:3]
lizard.raw <- lizard.wide %>%
ungroup() %>%
select(-c(1:3))
# to do a distance based ordination, we first have to create a distance matrix
# we can start with Euclidean distance
lizard.dist <- vegdist(lizard.raw, method = "euclidean")
# now we can run the PCOA on it using the cmdscale() function
lizard.pcoa <- cmdscale(lizard.dist, k = 2, eig = T)
# take a look at the object
lizard.pcoa # everything OK?
## $points
## [,1] [,2]
## [1,] -66.251679 97.113805
## [2,] -74.354068 299.206129
## [3,] -46.409989 -70.050669
## [4,] -60.496306 -63.602352
## [5,] -65.389163 -84.771638
## [6,] 31.270093 -39.611060
## [7,] -5.058303 -49.659915
## [8,] 210.261865 57.941709
## [9,] -61.578473 -80.738983
## [10,] -65.646894 57.279013
## [11,] -39.331764 -91.435574
## [12,] -61.447960 -117.869057
## [13,] -50.705426 -41.871704
## [14,] -63.375411 -20.437792
## [15,] -73.007675 277.288075
## [16,] -72.356452 299.193955
## [17,] -67.358924 -25.193708
## [18,] -9.041048 -75.986071
## [19,] -66.053514 -72.191072
## [20,] -57.555029 -39.525218
## [21,] -60.081643 -42.469412
## [22,] 180.286954 66.597098
## [23,] -63.940859 -102.169830
## [24,] -68.643888 51.620334
## [25,] -20.009720 -90.651801
## [26,] -67.127010 -110.216501
## [27,] -23.727114 42.857085
## [28,] -59.007288 -57.133650
## [29,] -69.787838 77.414388
## [30,] -68.332435 324.044431
## [31,] 73.566875 -60.090759
## [32,] -63.164461 -68.028566
## [33,] -64.776741 -31.526106
## [34,] 7.647274 3.153391
## [35,] 1542.545541 4.673760
## [36,] -69.134498 30.468271
## [37,] -65.722178 -42.349124
## [38,] -64.350336 92.121520
## [39,] -66.940594 -97.757583
## [40,] -36.788930 -112.672261
## [41,] -42.491235 -19.751355
## [42,] -66.133758 -73.211204
##
## $eig
## [1] 2.589854e+06 5.405792e+05 2.307861e+05 1.071581e+05 8.839519e+04
## [6] 7.418345e+04 6.043663e+04 4.247760e+04 3.790302e+04 3.421018e+04
## [11] 2.483673e+04 2.250917e+04 2.148858e+04 1.766619e+04 1.657884e+04
## [16] 1.376241e+04 1.093085e+04 1.063328e+04 9.886673e+03 7.912177e+03
## [21] 7.733002e+03 6.837282e+03 5.694501e+03 4.407403e+03 4.094321e+03
## [26] 3.766605e+03 2.786439e+03 2.598945e+03 2.502257e+03 1.981161e+03
## [31] 1.875280e+03 1.688176e+03 1.559685e+03 1.311434e+03 1.091187e+03
## [36] 1.014254e+03 9.228482e+02 8.155231e+02 6.412096e+02 5.979630e+02
## [41] 3.866763e+02 2.876285e-11
##
## $x
## NULL
##
## $ac
## [1] 0
##
## $GOF
## [1] 0.7793944 0.7793944
lizard.scores <- as.data.frame(lizard.pcoa$points) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(V1, V2))
lizard.plot <- ggplot() +
geom_point(data = lizard.scores, aes(x = V1, y = V2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = V1, V2, fill = Exposure), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PCo1") +
ylab("PCo2")
lizard.plot
lizard.dist <- vegdist(lizard.raw, method = "bray") # let's try the Bray-Curtis dissimilarity metric, which is a semi-metric dissimilarity measure designed to handle abundance data
# now we can run the PCOA on it using the cmdscale() function
lizard.pcoa <- cmdscale(lizard.dist, k = 2, eig = T)
lizard.pcoa #uh-oh, better correct that
## $points
## [,1] [,2]
## [1,] -0.169343902 0.111362282
## [2,] -0.451281604 0.114325972
## [3,] 0.159803853 0.049922107
## [4,] 0.110401192 -0.143316894
## [5,] 0.262134305 0.270832648
## [6,] 0.013867453 -0.322687479
## [7,] 0.015234736 -0.264395699
## [8,] -0.211178112 -0.138801631
## [9,] 0.168286115 -0.043361153
## [10,] -0.364013023 0.043814726
## [11,] 0.050287582 -0.224119652
## [12,] 0.168140226 0.128265348
## [13,] 0.007677336 -0.145804213
## [14,] 0.055376418 -0.013336868
## [15,] -0.210262137 0.075344861
## [16,] -0.427170514 0.040946293
## [17,] 0.090801781 0.017532823
## [18,] 0.074555675 -0.222151821
## [19,] 0.230138156 0.264666597
## [20,] 0.025580231 -0.291084943
## [21,] 0.020392030 -0.254786126
## [22,] -0.213405343 -0.080691422
## [23,] 0.253941794 -0.028540411
## [24,] -0.357736333 0.074330479
## [25,] 0.141707196 -0.125316972
## [26,] 0.211729977 0.324698546
## [27,] -0.025387340 -0.043907012
## [28,] 0.139600069 -0.008031405
## [29,] -0.140565449 0.193857385
## [30,] -0.390984455 0.176500523
## [31,] 0.114021352 -0.101267519
## [32,] 0.150646805 -0.042847986
## [33,] 0.163748562 0.163380808
## [34,] 0.045020697 -0.185569191
## [35,] -0.041610168 -0.203567529
## [36,] -0.167252931 0.104032274
## [37,] 0.145421117 0.082719968
## [38,] -0.330250941 0.135962648
## [39,] 0.178077348 0.033848600
## [40,] 0.234611185 0.287383041
## [41,] 0.073013160 0.049278691
## [42,] 0.196225903 0.140579304
##
## $eig
## [1] 1.723784e+00 1.145224e+00 6.463910e-01 5.537202e-01 4.762933e-01
## [6] 3.911931e-01 3.342039e-01 3.073085e-01 2.848823e-01 2.359893e-01
## [11] 2.240308e-01 1.880727e-01 1.847469e-01 1.440149e-01 1.391043e-01
## [16] 1.303950e-01 1.281280e-01 1.209120e-01 1.039267e-01 9.532212e-02
## [21] 8.271850e-02 7.611610e-02 6.645071e-02 6.189100e-02 5.395885e-02
## [26] 5.074267e-02 4.549638e-02 3.884304e-02 3.297210e-02 2.469777e-02
## [31] 1.950817e-02 1.266073e-02 1.039242e-02 7.941789e-03 4.461808e-04
## [36] 6.591949e-17 -7.145177e-03 -9.277816e-03 -1.362953e-02 -1.981142e-02
## [41] -2.596988e-02 -2.815455e-02
##
## $x
## NULL
##
## $ac
## [1] 0
##
## $GOF
## [1] 0.3479075 0.3523507
lizard.pcoa <- pcoa(lizard.dist, correction = "cailliez")
lizard.scores <- as.data.frame(lizard.pcoa$vectors) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(Axis.1, Axis.2))
lizard.plot.pcoa <- ggplot() +
geom_point(data = lizard.scores, aes(x = Axis.1, y = Axis.2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = Axis.1, Axis.2, fill = Exposure), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PCo1") +
ylab("PCo2") +
ggtitle("PCoA")
lizard.plot.pcoa
lizard.mds <- metaMDS(comm = lizard.raw, k = 2, trymax = 1000, distance = "bray") # we're specifying the data, the dimensions, the number of tries we want to give mds to find a solution, and the distance matrix we want to use
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1771763
## Run 1 stress 0.1763733
## ... New best solution
## ... Procrustes: rmse 0.03636571 max resid 0.2140796
## Run 2 stress 0.1809323
## Run 3 stress 0.1766939
## ... Procrustes: rmse 0.03845272 max resid 0.2164014
## Run 4 stress 0.1809149
## Run 5 stress 0.3998463
## Run 6 stress 0.245323
## Run 7 stress 0.1810183
## Run 8 stress 0.2489234
## Run 9 stress 0.2231666
## Run 10 stress 0.2427081
## Run 11 stress 0.2625846
## Run 12 stress 0.1809499
## Run 13 stress 0.2246407
## Run 14 stress 0.1810182
## Run 15 stress 0.1825524
## Run 16 stress 0.1766939
## ... Procrustes: rmse 0.03845159 max resid 0.2164016
## Run 17 stress 0.1810182
## Run 18 stress 0.1810182
## Run 19 stress 0.1808804
## Run 20 stress 0.1824457
## Run 21 stress 0.2050438
## Run 22 stress 0.1810182
## Run 23 stress 0.1766939
## ... Procrustes: rmse 0.03845482 max resid 0.216404
## Run 24 stress 0.1976005
## Run 25 stress 0.1766785
## ... Procrustes: rmse 0.03771567 max resid 0.2149804
## Run 26 stress 0.1764021
## ... Procrustes: rmse 0.008987061 max resid 0.04473765
## Run 27 stress 0.1781402
## Run 28 stress 0.1766785
## ... Procrustes: rmse 0.03771412 max resid 0.2149778
## Run 29 stress 0.1824456
## Run 30 stress 0.2477244
## Run 31 stress 0.1810182
## Run 32 stress 0.2292575
## Run 33 stress 0.2501318
## Run 34 stress 0.1809149
## Run 35 stress 0.1763732
## ... New best solution
## ... Procrustes: rmse 2.545639e-05 max resid 0.0001050924
## ... Similar to previous best
## *** Solution reached
lizard.mds #lets see the results
##
## Call:
## metaMDS(comm = lizard.raw, distance = "bray", k = 2, trymax = 1000)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(lizard.raw))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1763732
## Stress type 1, weak ties
## Two convergent solutions found after 35 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(lizard.raw))'
stressplot(lizard.mds)
lizard.scores <- as.data.frame(lizard.mds$points) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(MDS1, MDS2))
lizard.plot.mds <- ggplot() +
geom_point(data = lizard.scores, aes(x = MDS1, y = MDS2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = MDS1, MDS2, fill = Exposure), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("MDS1") +
ylab("MDS2") +
ggtitle("nMDS")
lizard.plot.mds
pcoamds <- lizard.plot.pcoa / lizard.plot.mds
pcoamds
- we can see that despite using the same distance metric (Bray-Curtis), the nMDS produces a cleaner, better separation among the transects than the PCoA by taking out abundance-based differences among species and running on ranks only - and, surprise: we can actually bring our loadings back in if we wanted to (I don’t think we do). But you’ll find the species hidden in the nmds object - phew, that’s a lot to take in
For this exercise, we will use the same dataset, but this time, I would like you to see whether there is a difference in fish communities between the two years, 2011 and 2015. Using the fishcoms_lizardisland.csv dataset:
lizard.sum <- lfish %>%
group_by(Site, transect, year, species) %>%
summarize(abundance = sum(abundance))
## `summarise()` has grouped output by 'Site', 'transect', 'year'. You can override using the `.groups` argument.
lizard.wide <- lizard.sum %>%
spread(key = species, value = abundance, fill = 0)
lizard.meta <- lizard.wide[1:3]
lizard.raw <- lizard.wide %>%
ungroup() %>%
select(-c(1:3))
lizard.d.euc <- vegdist(lizard.raw, method = "euclidean")
lizard.d.bray <- vegdist(lizard.raw, method = "bray")
lizard.pcoa.euc <- pcoa(lizard.d.euc, correction = "cailliez")
lizard.pcoa.bray <- pcoa(lizard.d.bray, correction = "cailliez")
lizard.scores.e <- as.data.frame(lizard.pcoa.euc$vectors) %>%
bind_cols(lizard.meta)
lizard.hulls.e <- lizard.scores.e %>%
group_by(year) %>%
slice(chull(Axis.1, Axis.2))
lizard.plot.e <- ggplot() +
geom_point(data = lizard.scores.e, aes(x = Axis.1, y = Axis.2, color = as.factor(year)), size = 2) +
geom_polygon(data = lizard.hulls.e, aes(x = Axis.1, Axis.2, fill = as.factor(year)), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PCo1 (62.9%)") +
ylab("PCo2 (12.0%)") +
ggtitle("PCoA Euclidean")
lizard.plot.e # disgusting
lizard.scores.b <- as.data.frame(lizard.pcoa.bray$vectors) %>%
bind_cols(lizard.meta)
lizard.hulls.b <- lizard.scores.b %>%
group_by(year) %>%
slice(chull(Axis.1, Axis.2))
lizard.plot.b <- ggplot() +
geom_point(data = lizard.scores.b, aes(x = Axis.1, y = Axis.2, color = as.factor(year)), size = 2) +
geom_polygon(data = lizard.hulls.b, aes(x = Axis.1, Axis.2, fill = as.factor(year)), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("PCo1 (12.5%)") +
ylab("PCo2 (8.4%)") +
ggtitle("PCoA Bray")
lizard.plot.b #better
# now do the nMDS
lizard.mds <- metaMDS(comm = lizard.raw, k = 2, trymax = 1000, distance = "bray") # we're specifying the data, the dimensions, the number of tries we want to give mds to find a solution, and the distance matrix we want to use
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2318191
## Run 1 stress 0.2328461
## Run 2 stress 0.2318219
## ... Procrustes: rmse 0.001243229 max resid 0.009462209
## ... Similar to previous best
## Run 3 stress 0.2328704
## Run 4 stress 0.2328461
## Run 5 stress 0.2318425
## ... Procrustes: rmse 0.003739858 max resid 0.02703135
## Run 6 stress 0.2318197
## ... Procrustes: rmse 0.003265873 max resid 0.02712335
## Run 7 stress 0.2770384
## Run 8 stress 0.2483551
## Run 9 stress 0.2318724
## ... Procrustes: rmse 0.005264827 max resid 0.03847742
## Run 10 stress 0.2328583
## Run 11 stress 0.2318219
## ... Procrustes: rmse 0.001242333 max resid 0.009449033
## ... Similar to previous best
## Run 12 stress 0.23187
## ... Procrustes: rmse 0.006067959 max resid 0.03862904
## Run 13 stress 0.231822
## ... Procrustes: rmse 0.001243597 max resid 0.009434762
## ... Similar to previous best
## Run 14 stress 0.2318797
## ... Procrustes: rmse 0.005867234 max resid 0.03853855
## Run 15 stress 0.2539136
## Run 16 stress 0.2318701
## ... Procrustes: rmse 0.006076861 max resid 0.03874786
## Run 17 stress 0.2318197
## ... Procrustes: rmse 0.003266207 max resid 0.02712522
## Run 18 stress 0.265808
## Run 19 stress 0.2577595
## Run 20 stress 0.2328584
## *** Solution reached
lizard.mds #lets see the results
##
## Call:
## metaMDS(comm = lizard.raw, distance = "bray", k = 2, trymax = 1000)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(lizard.raw))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.2318191
## Stress type 1, weak ties
## Two convergent solutions found after 20 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(lizard.raw))'
stressplot(lizard.mds)
lizard.scores <- as.data.frame(lizard.mds$points) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(year) %>%
slice(chull(MDS1, MDS2))
lizard.plot.mds <- ggplot() +
geom_point(data = lizard.scores, aes(x = MDS1, y = MDS2, color = as.factor(year)), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = MDS1, MDS2, fill = as.factor(year)), alpha = 0.5) +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("MDS1") +
ylab("MDS2") +
ggtitle("nMDS")
lizard.plot.mds
# to do this, let's go back to our dataset that had only pomacentrids in it
lizard.sum <- lfish %>%
filter(Family == "Pomacentridae") %>%
group_by(Site, Exposure, species) %>%
summarize(abundance = sum(abundance))
## `summarise()` has grouped output by 'Site', 'Exposure'. You can override using the `.groups` argument.
lizard.wide <- lizard.sum %>%
spread(key = species, value = abundance, fill = 0)
lizard.meta <- lizard.wide[1:2]
lizard.raw <- lizard.wide %>%
ungroup() %>%
select(-c(1:2))
# now let's try running the RDA
lizard.rda <- rda(decostand(lizard.raw, method = "hellinger") ~ lizard.meta$Exposure) # I am using the hellinger transformation to standardize my data
lizard.rda
## Call: rda(formula = decostand(lizard.raw, method = "hellinger") ~
## lizard.meta$Exposure)
##
## Inertia Proportion Rank
## Total 0.2629 1.0000
## Constrained 0.1159 0.4410 3
## Unconstrained 0.1470 0.5590 10
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3
## 0.06839 0.03660 0.01095
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## 0.04347 0.02345 0.01862 0.01564 0.01472 0.01110 0.00783 0.00511 0.00390 0.00314
# this provides some useful information - can you interpret what it says?
# we can get even more useful statistical output, calculating both the R-squared and a p-value statistic
RsquareAdj(lizard.rda)
## $r.squared
## [1] 0.4409872
##
## $adj.r.squared
## [1] 0.2732834
anova(lizard.rda, permutations=1000)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 1000
##
## Model: rda(formula = decostand(lizard.raw, method = "hellinger") ~ lizard.meta$Exposure)
## Df Variance F Pr(>F)
## Model 3 0.11594 2.6296 0.000999 ***
## Residual 10 0.14696
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# and of course, we can plot the results
sum.rda <- summary(lizard.rda)
lizard.scores <- as.data.frame(sum.rda$sites) %>%
bind_cols(lizard.meta)
lizard.hulls <- lizard.scores %>%
group_by(Exposure) %>%
slice(chull(RDA1, RDA2))
lizard.loadings <- as.data.frame(sum.rda$species) %>%
mutate(species = rownames(.))
lizard.constraint <- as.data.frame(sum.rda$centroids) %>%
add_column(Exposure = c("Exposed", "Lagoon", "Oblique", "Sheltered"))
lizard.rda.plot <- ggplot() +
geom_point(data = lizard.scores, aes(x = RDA1, y = RDA2, color = Exposure), size = 2) +
geom_polygon(data = lizard.hulls, aes(x = RDA1, RDA2, fill = Exposure), alpha = 0.5) +
geom_segment(data = lizard.loadings, aes(x = 0, xend = RDA1*2,
y = 0, yend = RDA2*2), lwd = 0.1) +
geom_text(data = lizard.loadings, aes(x = RDA1*2, y = RDA2*2, label = species), size = 3) +
geom_segment(data = lizard.constraint, aes(x = 0, xend = RDA1,
y = 0, yend = RDA2), lwd = 1, color = "grey14",
arrow = arrow(length=unit(0.30,"cm"), ends="last", type = "closed")) +
geom_text(data = lizard.constraint, aes(x = RDA1+0.05, y = RDA2+0.05, label = Exposure), size = 5, color = "grey14") +
scale_fill_fish_d(option = "Stegastes_nigricans", end = 0.8) +
scale_color_fish_d(option = "Stegastes_nigricans", end = 0.8) +
theme_bw() +
xlab("RDA1 (26.1%)") +
ylab("RDA2 (13.9%)") +
ggtitle("RDA")
lizard.rda.plot