# install the vegan package by running install.packages()
# the vegan package facilitates multivariate analyses
install.packages("vegan", repos = "http://cran.us.r-project.org")
# load the tidyverse package in your current R session
library(tidyverse)
# load the ggplot2 package in your current R session
library(ggplot2)
# load the fishualize package in your current R session
library(fishualize)
# load the vegan package in your current R session
library(vegan)
Read in datasets to use with community analysis
# morphology of herbivorous coral reef fishes, relative to standard length
herbivore.morphology <- read.csv(file = "data/herbivore_morphology.csv")
head(herbivore.morphology)
## family genus species gen.spe bodydepth
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri 0.045968691
## snoutlength eyediameter headlength peduncle
## 1 0.4877797 0.069172106 0.2315766 -0.03889361
## 2 0.4402623 0.000622937 0.2405350 -0.06240037
## 3 0.5386490 -0.008570787 0.2263199 -0.03512062
## 4 0.4782217 -0.004914155 0.2538896 -0.02705601
## 5 0.5661867 0.015159797 0.2443877 -0.04640333
## 6 0.5950563 -0.005349450 0.2244994 -0.06197221
Ecological data is complex, oftentimes including many variables. A principal components analysis (PCA) is useful to interpret complex datasets because it reduces dimensionality but preserves variability, allowing for easier interpretation of overall trends. In other words, it reduces many variables down to a few principal components.
First, variables must be standardized to compare across variables. Without standardizing the variables on the same scale, the PCA will assume that one variable is more important than the other.
Calculate the means across the columns to determine whether the variables need to be standardized.
# calculate the means of the columns
colMeans(herbivore.morphology[5:9])
## bodydepth snoutlength eyediameter headlength peduncle
## 0.003329304 0.428684939 0.001989914 0.267733093 -0.003910865
Data normalization can be achieved with Z score standardization, which subtracts the mean of the column from each value and then divides it by the standard deviation. This results in a dataset with a mean of 0 and a standard deviation of 1, without changing the relative differences between the rows in the dataset.
Demonstration of manual data normalization
# use columns body depth and snout length
herb.practice <- herbivore.morphology %>%
select(bodydepth, snoutlength) %>%
mutate(bd.zscore = (bodydepth - mean(bodydepth))/sd(bodydepth),
sl.zscore = (snoutlength - mean(snoutlength))/sd(snoutlength))
head(herb.practice)
## bodydepth snoutlength bd.zscore sl.zscore
## 1 0.098504108 0.4877797 1.47082227 0.5194079
## 2 0.001397261 0.4402623 -0.02985761 0.1017584
## 3 0.034632323 0.5386490 0.48375384 0.9665194
## 4 0.075265132 0.4782217 1.11168937 0.4353988
## 5 0.100102812 0.5661867 1.49552849 1.2085590
## 6 0.045968691 0.5950563 0.65894499 1.4623064
# check mean and standard deviation of body depth and snout length
# means = 0 and standard deviations = 1
practice.means <- herb.practice %>%
summarize(mean.zbd = mean(bd.zscore),
mean.zsl = mean(sl.zscore),
sd.zbd = sd(bd.zscore),
sd.zsl = sd(sl.zscore))
practice.means
## mean.zbd mean.zsl sd.zbd sd.zsl
## 1 -2.520945e-17 2.095355e-16 1 1
Z score standardization can also be achieved with the scale() function.
Demonstration of scale() function on a vector
# use scale() on body depth
# be aware that scale() turns output into a matrix, which can be turned back into a vector with as.vector()
scaled.bd <- scale(herbivore.morphology$bodydepth)
head(scaled.bd)
## [,1]
## [1,] 1.47082227
## [2,] -0.02985761
## [3,] 0.48375384
## [4,] 1.11168937
## [5,] 1.49552849
## [6,] 0.65894499
# mean of standardized body depth = 0
mean(scaled.bd)
## [1] -2.405646e-17
# standard deviation of standardized body depth = 1
sd(scaled.bd)
## [1] 1
Applying the scale() function across a dataset with tidyverse
# use scale() across multiple columns with across() function in mutate()
herb.scaled <- herbivore.morphology %>%
mutate(across(where(is.numeric), ~scale(.)))
# check that mean = 0 and standard deviation = 1 for one variable - body depth
mean(herb.scaled$bodydepth)
## [1] -2.405646e-17
sd(herb.scaled$bodydepth)
## [1] 1
# use gather() across columns
herb.scaled.long <- herb.scaled %>%
gather(5:9, key = "trait", value = "value")
head(herb.scaled.long)
## family genus species gen.spe trait
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles bodydepth
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis bodydepth
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus bodydepth
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii bodydepth
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri bodydepth
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri bodydepth
## value
## 1 1.47082227
## 2 -0.02985761
## 3 0.48375384
## 4 1.11168937
## 5 1.49552849
## 6 0.65894499
# summarize means and standard deviations across all variables
herb.scaled.summary <- herb.scaled.long %>%
group_by(trait) %>%
summarize(mean = mean(value),
sd = sd(value))
head(herb.scaled.summary)
## # A tibble: 5 × 3
## trait mean sd
## <chr> <dbl> <dbl>
## 1 bodydepth -2.41e-17 1
## 2 eyediameter -1.52e-17 1
## 3 headlength -4.80e-16 1
## 4 peduncle -5.90e-18 1
## 5 snoutlength 2.10e-16 1
# use ggplot to visualize means and standard deviations of all variables
# use fishualize for color palette
trait.means <- ggplot(herb.scaled.summary, aes(x = trait, y = mean, color = trait)) +
geom_pointrange(aes(ymin = mean-sd, ymax = mean+sd)) +
geom_jitter(data = herb.scaled.long, aes(x = trait, y = value, color = trait), alpha = 0.2) +
theme_bw() +
scale_color_fish_d(option = "Acanthurus_sohal")
trait.means
The vegan package has an embedded function for data normalization, so it’s typically not necessary to perform the above data normalization.
Unlike most coding practices, which require data in long format, to perform a PCA, data need to be in a wide format. Data can be reformatted using the spread() and gather() functions.
Confirm that data are in wide format
head(herbivore.morphology)
## family genus species gen.spe bodydepth
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri 0.045968691
## snoutlength eyediameter headlength peduncle
## 1 0.4877797 0.069172106 0.2315766 -0.03889361
## 2 0.4402623 0.000622937 0.2405350 -0.06240037
## 3 0.5386490 -0.008570787 0.2263199 -0.03512062
## 4 0.4782217 -0.004914155 0.2538896 -0.02705601
## 5 0.5661867 0.015159797 0.2443877 -0.04640333
## 6 0.5950563 -0.005349450 0.2244994 -0.06197221
str(herbivore.morphology)
## 'data.frame': 93 obs. of 9 variables:
## $ family : chr "Acanthuridae" "Acanthuridae" "Acanthuridae" "Acanthuridae" ...
## $ genus : chr "Acanthurus" "Acanthurus" "Acanthurus" "Acanthurus" ...
## $ species : chr "achilles" "albipectoralis" "auranticavus" "blochii" ...
## $ gen.spe : chr "Acanthurus.achilles" "Acanthurus.albipectoralis" "Acanthurus.auranticavus" "Acanthurus.blochii" ...
## $ bodydepth : num 0.0985 0.0014 0.0346 0.0753 0.1001 ...
## $ snoutlength: num 0.488 0.44 0.539 0.478 0.566 ...
## $ eyediameter: num 0.069172 0.000623 -0.008571 -0.004914 0.01516 ...
## $ headlength : num 0.232 0.241 0.226 0.254 0.244 ...
## $ peduncle : num -0.0389 -0.0624 -0.0351 -0.0271 -0.0464 ...
The dataset includes a few columns with metadata for each species (family, genus, and species). Multivariate functions can only be performed on numeric data, so the character data must be isolated.
Set row names to keep track of which row belongs to which species
herbivore.meta <- herbivore.morphology[1:4]
herbivore.data <- herbivore.morphology[5:9]
rownames(herbivore.data) <- herbivore.meta$gen.spe
head(herbivore.data)
## bodydepth snoutlength eyediameter headlength
## Acanthurus.achilles 0.098504108 0.4877797 0.069172106 0.2315766
## Acanthurus.albipectoralis 0.001397261 0.4402623 0.000622937 0.2405350
## Acanthurus.auranticavus 0.034632323 0.5386490 -0.008570787 0.2263199
## Acanthurus.blochii 0.075265132 0.4782217 -0.004914155 0.2538896
## Acanthurus.dussumieri 0.100102812 0.5661867 0.015159797 0.2443877
## Acanthurus.fowleri 0.045968691 0.5950563 -0.005349450 0.2244994
## peduncle
## Acanthurus.achilles -0.03889361
## Acanthurus.albipectoralis -0.06240037
## Acanthurus.auranticavus -0.03512062
## Acanthurus.blochii -0.02705601
## Acanthurus.dussumieri -0.04640333
## Acanthurus.fowleri -0.06197221
How to run a PCA in R:
Simple example with vegan
# vegan uses the rda() function to run a PCA
# RDA stands for redundancy analysis, and it is a constrained ordination
# by leaving an RDA unconstrained (no predictor variables), we can run a PCA
# set scale = TRUE to make sure the data are normalized
trait.pca <- rda(herbivore.data, scale = TRUE)
PCA output
# call the PCA output
trait.pca
## Call: rda(X = herbivore.data, scale = TRUE)
##
## Inertia Rank
## Total 5
## Unconstrained 5 5
## Inertia is correlations
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5
## 2.8475 1.1003 0.6356 0.2725 0.1441
# use summary to get more detailed output
summary(trait.pca)
##
## Call:
## rda(X = herbivore.data, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 5 1
## Unconstrained 5 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Eigenvalue 2.8475 1.1003 0.6356 0.2725 0.14407
## Proportion Explained 0.5695 0.2201 0.1271 0.0545 0.02881
## Cumulative Proportion 0.5695 0.7896 0.9167 0.9712 1.00000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 4.631157
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5
## bodydepth -1.050 -1.5373 -0.8116 0.3642 0.1827
## snoutlength -1.452 -1.0370 0.9612 -0.2152 -0.3697
## eyediameter -1.647 0.8518 -0.7641 0.1693 -0.4881
## headlength 1.688 -0.6794 -0.7037 -0.6245 -0.3083
## peduncle 1.856 -0.3067 0.2545 0.7558 -0.3387
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5
## Acanthurus.achilles -0.549256 -0.196847 -0.41673 0.826915 -0.26084
## Acanthurus.albipectoralis -0.192537 0.122071 0.17912 -0.036085 0.62505
## Acanthurus.auranticavus -0.324185 -0.232743 0.50241 0.312512 0.43580
## Acanthurus.blochii -0.213814 -0.415571 -0.06857 0.287514 0.48191
## Acanthurus.dussumieri -0.455527 -0.591079 0.02430 0.327667 0.05007
## Acanthurus.fowleri -0.454255 -0.362957 0.59424 0.135611 0.31241
## Acanthurus.guttatus -0.096086 -1.258641 -1.51551 -0.087712 0.59606
## Acanthurus.leucocheilus -0.784262 0.087402 0.09582 0.976815 -0.68697
## Acanthurus.leucopareius -0.390412 -0.642024 -0.32687 0.488518 0.74926
## Acanthurus.lineatus -0.100012 -0.213455 0.34984 0.426566 0.81645
## Acanthurus.maculiceps -0.383387 -0.409522 0.24407 0.061145 -0.45157
## Acanthurus.mata -0.401230 0.234969 0.28977 0.172868 0.22837
## Acanthurus.nigricans -0.386528 -0.345298 -0.30188 0.362352 0.03945
## Acanthurus.nigricauda -0.465632 -0.032885 0.53010 0.485782 -0.30592
## Acanthurus.nigrofuscus -0.091720 0.173699 -0.08377 0.391398 0.18417
## Acanthurus.nigroris -0.347660 0.004722 0.42290 0.593773 0.33824
## Acanthurus.olivaceus -0.322469 0.065212 0.45269 0.360310 0.12960
## Acanthurus.pyroferus -0.310583 -0.150954 0.08954 0.587208 -0.17343
## Acanthurus.thompsoni -0.347337 0.598348 0.16656 0.898583 0.03072
## Acanthurus.triostegus -0.120328 -0.178206 -0.24733 -0.108616 0.30333
## Acanthurus.xanthopterus -0.322311 -0.474877 -0.02990 -0.075403 0.35733
## Bolbometopon.muricatum 0.220941 0.015465 -0.81513 0.241744 -0.40580
## Calotomus.spinidens 0.736584 0.508882 0.41982 0.145244 0.12395
## Cetoscarus.ocellatus 0.515129 -0.477080 -0.29621 -0.796305 -0.98072
## Chlorurus.bleekeri 0.619642 0.091061 0.33170 0.118199 -0.38685
## Chlorurus.bowersi 0.678347 -0.280599 0.26759 -0.235371 0.27302
## Chlorurus.frontalis 0.393237 -0.110726 -0.21260 0.030249 -0.29273
## Chlorurus.japanensis 0.599980 -0.047435 0.32979 0.165712 0.03893
## Chlorurus.microrhinos 0.204752 -0.105734 -0.33058 0.690883 -0.11013
## Chlorurus.spilurus 0.717304 -0.040805 0.35929 -0.089668 -0.53735
## Ctenochaetus.binotatus -0.071096 -0.185757 -0.32754 0.106305 -0.64630
## Ctenochaetus.flavicauda -0.244049 -0.415097 -0.28747 0.500793 -0.24415
## Ctenochaetus.hawaiiensis -0.477079 -0.981251 0.52867 0.244165 -0.04086
## Ctenochaetus.marginatus -0.475728 -0.347915 0.16410 0.723585 0.37866
## Ctenochaetus.striatus -0.125231 -0.524514 0.38987 -0.006667 -0.02281
## Ctenochaetus.strigosus -0.110491 -0.629268 0.20461 0.126077 0.49351
## Ctenochaetus.tominiensis -0.004192 -0.335207 0.67845 0.134942 0.36380
## Hipposcarus.longiceps 0.295972 -0.326860 -0.60504 -0.947791 -0.24853
## Kyphosus.vaigiensis 0.543936 -0.241914 -1.55915 -0.972814 1.33836
## Leptoscarus.vaigiensis 0.733534 0.419722 0.26562 0.060847 -0.26997
## Naso.annulatus -0.671303 0.636315 0.40144 -0.031478 -0.40852
## Naso.brachycentron -0.740326 -0.012775 -0.25981 -1.087854 -0.54543
## Naso.brevirostris -0.654983 0.459308 0.38794 -0.338188 -0.09561
## Naso.hexacanthus -0.582123 0.672470 0.78234 -0.567505 -0.06626
## Naso.lituratus -0.338452 -0.205942 0.51040 -0.938095 -0.04949
## Naso.lopezi -0.642887 0.752491 0.32265 -0.509322 -0.53358
## Naso.thynnoides -0.277299 0.903328 0.67391 -0.819409 0.57055
## Naso.tuberosus -0.600209 0.343675 0.01078 -0.889882 -0.59383
## Naso.unicornis -0.727336 -0.259883 0.46713 -0.712284 0.14285
## Naso.vlamingii -0.784968 0.476269 0.41511 -0.481296 -0.56490
## Paracanthurus.hepatus -0.170708 -0.260372 0.40963 -0.304985 0.15924
## Scarus.altipinnis 0.362909 -0.433734 0.27530 0.096693 -0.55635
## Scarus.chameleon 0.735874 0.023917 0.25569 -0.053399 0.39927
## Scarus.dimidiatus 0.798730 0.076239 0.49850 0.149822 -0.06216
## Scarus.festivus 0.635275 0.044563 -0.18677 0.189500 0.02563
## Scarus.flavipectoralis 0.761533 0.219163 0.44764 -0.033140 0.53360
## Scarus.forsteni 0.557740 0.112582 0.14137 0.149350 0.06873
## Scarus.frenatus 0.710884 0.031198 0.23501 0.278360 -0.28597
## Scarus.ghobban 0.320318 -0.255042 -0.44972 -0.407212 -0.31276
## Scarus.globiceps 0.670007 -0.006700 0.22356 0.250030 -0.05515
## Scarus.hypselopterus 0.765772 0.352374 0.47912 0.265259 0.34841
## Scarus.longipinnis 0.382589 0.374147 0.24181 0.696125 -0.16082
## Scarus.niger 0.575313 -0.138538 -0.12514 0.287207 -0.19853
## Scarus.oviceps 0.787324 -0.010247 0.37864 -0.043355 -0.53162
## Scarus.prasiognathos 0.530023 -0.083433 0.10007 0.193815 0.15194
## Scarus.psittacus 0.679511 -0.194834 0.08906 0.092821 -0.04585
## Scarus.quoyi 0.569658 0.033128 -0.08175 0.055416 0.03527
## Scarus.rivulatus 0.505842 -0.172985 -0.13776 0.728452 0.21801
## Scarus.rubroviolaceus 0.348523 0.044175 -0.43462 -0.099810 -0.86521
## Scarus.schlegeli 0.706009 -0.069593 -0.06508 -0.179623 0.01703
## Scarus.spinus 0.578802 -0.064642 0.07582 0.346024 -0.54801
## Scarus.tricolor 0.657465 0.174872 0.22802 0.158247 0.01095
## Scarus.xanthopleura 0.463790 -0.088127 -0.45253 -0.403154 -0.79054
## Siganus.argenteus -0.146839 0.775781 0.35105 -0.523297 1.41224
## Siganus.canaliculatus 0.006319 0.891053 0.23832 -0.480920 0.62514
## Siganus.corallinus -0.080446 0.014277 -0.30966 -0.747394 0.25946
## Siganus.doliatus -0.123483 0.358985 -0.75453 -0.166010 0.41527
## Siganus.fuscescens -0.147512 0.791480 -0.30559 -0.184150 0.26082
## Siganus.guttatus -0.470678 0.617754 -1.56597 0.639240 -0.32156
## Siganus.javus -0.093929 0.249917 -0.57795 -0.362118 0.71322
## Siganus.lineatus -0.148844 0.383187 -0.70603 -0.197787 0.10004
## Siganus.niger -0.249042 0.298231 -0.43268 -0.475022 -0.55285
## Siganus.puellus 0.054996 0.429036 -0.11193 -0.771388 -0.05215
## Siganus.punctatissimus -0.479031 0.650957 -0.98195 0.597733 -0.13505
## Siganus.punctatus -0.302639 0.366550 -0.25728 0.100289 0.56360
## Siganus.randalli -0.098522 1.047577 -0.77982 0.130004 -1.27195
## Siganus.spinus -0.052700 1.388201 -0.02476 -0.082678 -0.03378
## Siganus.stellatus -0.287467 0.075813 -0.10246 0.096600 1.12423
## Siganus.vermiculatus -0.016556 0.041327 -0.67886 -0.307382 0.40666
## Zebrasoma.flavescens -0.649717 -0.639305 -0.13111 0.138910 -0.59445
## Zebrasoma.rostratum -0.094915 -1.611350 0.81659 -1.449383 -0.63718
## Zebrasoma.scopas -0.369763 -1.103403 -0.20763 -0.361144 0.13989
## Zebrasoma.velifer -0.854489 -0.265766 0.20787 0.740915 -0.45596
Interpretation of output
Use base R to visualize the PCA
# use biplot to make a PCA plot with base R
# each point is a fish species, and they are the species scores
# each vector is a fish trait, and are called loadings
biplot(trait.pca)
Use the screeplot() function to show the relative importance of each component
screeplot(trait.pca)
Add information to improve the base R plot
# remember that "species" refers to fish traits and "sites" refers to fish species
biplot(trait.pca,
display = c("sites", "species"),
type = c("text", "text"),
col = c("black", "grey"))
Use metadata to further improve the plot
# make the basic plot, without all of the fish species names
biplot(trait.pca,
type = c("text", "points"),
col = c("black", "grey"))
# visualize groupings among fish families
# add convex hulls around all species (points) within a fish family
ordihull(trait.pca,
group = herbivore.meta$family,
col = c("blue", "goldenrod2", "forestgreen", "orchid"))
# add a legend
# note that Kyphosidae is not visible on the plot because a convex hull cannot be drawn around only one species
fam.names <- sort(unique(herbivore.meta$family))
legend("topright",
col = c("blue", "goldenrod2", "forestgreen", "orchid"),
lty = 1,
legend = fam.names)
To use ggplot, the PCA data must be extracted. It is not automated like in base R.
Retrieve the site scores
# first create an object with all the PCA data
pca.summary <- summary(trait.pca)
# extract the "sites" - fish species
# use inner_join to merge the site scores with the metadata
sp.scores <- as.data.frame(pca.summary$sites) %>%
mutate(gen.spe = rownames(.)) %>%
inner_join(herbivore.meta)
## Joining, by = "gen.spe"
head(sp.scores)
## PC1 PC2 PC3 PC4 PC5
## 1 -0.5492562 -0.1968472 -0.4167308 0.82691471 -0.26084223
## 2 -0.1925365 0.1220711 0.1791163 -0.03608451 0.62505315
## 3 -0.3241855 -0.2327432 0.5024143 0.31251181 0.43579767
## 4 -0.2138137 -0.4155709 -0.0685711 0.28751377 0.48191286
## 5 -0.4555274 -0.5910787 0.0242970 0.32766748 0.05006746
## 6 -0.4542550 -0.3629571 0.5942380 0.13561081 0.31241064
## gen.spe family genus species
## 1 Acanthurus.achilles Acanthuridae Acanthurus achilles
## 2 Acanthurus.albipectoralis Acanthuridae Acanthurus albipectoralis
## 3 Acanthurus.auranticavus Acanthuridae Acanthurus auranticavus
## 4 Acanthurus.blochii Acanthuridae Acanthurus blochii
## 5 Acanthurus.dussumieri Acanthuridae Acanthurus dussumieri
## 6 Acanthurus.fowleri Acanthuridae Acanthurus fowleri
Retrieve the loadings
# extract the "species" - fish traits
trait.loadings <- as.data.frame(pca.summary$species) %>%
mutate(trait = rownames(.))
head(trait.loadings)
## PC1 PC2 PC3 PC4 PC5 trait
## bodydepth -1.049594 -1.5372541 -0.8115897 0.3642146 0.1827414 bodydepth
## snoutlength -1.451651 -1.0370060 0.9611858 -0.2151709 -0.3696923 snoutlength
## eyediameter -1.647169 0.8518456 -0.7640840 0.1693478 -0.4880713 eyediameter
## headlength 1.687522 -0.6793976 -0.7036693 -0.6245244 -0.3082686 headlength
## peduncle 1.856005 -0.3066973 0.2544979 0.7557992 -0.3386767 peduncle
Visualize PCA with ggplot
# use fishualize as color palette
# color points by fish family
pca.ggplot <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = family)) +
theme_bw() +
scale_color_fish_d(option = "Acanthurus_sohal")
pca.ggplot
Add loadings to plot
# use geom_segment() to draw segments for loadings
# use geom_text() to add labels
# various other aesthetics to limit axes and position the legend at the top of the plot
# include the variance explained on the x-axis and y-axis
pca.ggplot.loadings <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = family), size = 2.5) +
geom_segment(data = trait.loadings, aes(x = 0, xend = PC1/2, y = 0, yend = PC2/2), lwd = 0.1) +
geom_text(data = trait.loadings, aes(x = PC1/2, y = PC2/2+0.1, label = trait), size = 3) +
theme_bw() +
scale_color_fish_d(option = "Acanthurus_sohal") +
scale_x_continuous(limits = c(-1,1)) +
theme(legend.position = "top") +
xlab("PC1 (57.0%)") +
ylab("PC2 (22.0%)")
pca.ggplot.loadings
Add convex hulls to delineate families
# create convex hulls grouped by family based on site scores
# use the slice() and the chull() functions to calculate each family's convex hull
family.hulls <- sp.scores %>%
group_by(family) %>%
slice(chull(PC1, PC2))
# add the family convex hulls to the PCA
pca.plot.loadings.hulls <- pca.ggplot.loadings +
geom_polygon(data = family.hulls, aes(x = PC1, y = PC2, fill = family), alpha = 0.4, show.legend = F)
pca.plot.loadings.hulls