1 Packages and data

# install the vegan package by running install.packages()
# the vegan package facilitates multivariate analyses
install.packages("vegan", repos = "http://cran.us.r-project.org")
# load the tidyverse package in your current R session
library(tidyverse)

# load the ggplot2 package in your current R session
library(ggplot2)

# load the fishualize package in your current R session
library(fishualize)

# load the vegan package in your current R session
library(vegan)

Read in datasets to use with community analysis

# morphology of herbivorous coral reef fishes, relative to standard length
herbivore.morphology <- read.csv(file = "data/herbivore_morphology.csv")
head(herbivore.morphology)
##         family      genus        species                   gen.spe   bodydepth
## 1 Acanthuridae Acanthurus       achilles       Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus   auranticavus   Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus        blochii        Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus     dussumieri     Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus        fowleri        Acanthurus.fowleri 0.045968691
##   snoutlength  eyediameter headlength    peduncle
## 1   0.4877797  0.069172106  0.2315766 -0.03889361
## 2   0.4402623  0.000622937  0.2405350 -0.06240037
## 3   0.5386490 -0.008570787  0.2263199 -0.03512062
## 4   0.4782217 -0.004914155  0.2538896 -0.02705601
## 5   0.5661867  0.015159797  0.2443877 -0.04640333
## 6   0.5950563 -0.005349450  0.2244994 -0.06197221

2 Principal Components Analysis (PCA)

Ecological data is complex, oftentimes including many variables. A principal components analysis (PCA) is useful to interpret complex datasets because it reduces dimensionality but preserves variability, allowing for easier interpretation of overall trends. In other words, it reduces many variables down to a few principal components.

2.1 Data normalization

First, variables must be standardized to compare across variables. Without standardizing the variables on the same scale, the PCA will assume that one variable is more important than the other.

Calculate the means across the columns to determine whether the variables need to be standardized.

# calculate the means of the columns
colMeans(herbivore.morphology[5:9])
##    bodydepth  snoutlength  eyediameter   headlength     peduncle 
##  0.003329304  0.428684939  0.001989914  0.267733093 -0.003910865

Data normalization can be achieved with Z score standardization, which subtracts the mean of the column from each value and then divides it by the standard deviation. This results in a dataset with a mean of 0 and a standard deviation of 1, without changing the relative differences between the rows in the dataset.

Demonstration of manual data normalization

# use columns body depth and snout length
herb.practice <- herbivore.morphology %>%
  select(bodydepth, snoutlength) %>%
  mutate(bd.zscore = (bodydepth - mean(bodydepth))/sd(bodydepth),
         sl.zscore = (snoutlength - mean(snoutlength))/sd(snoutlength))
head(herb.practice)
##     bodydepth snoutlength   bd.zscore sl.zscore
## 1 0.098504108   0.4877797  1.47082227 0.5194079
## 2 0.001397261   0.4402623 -0.02985761 0.1017584
## 3 0.034632323   0.5386490  0.48375384 0.9665194
## 4 0.075265132   0.4782217  1.11168937 0.4353988
## 5 0.100102812   0.5661867  1.49552849 1.2085590
## 6 0.045968691   0.5950563  0.65894499 1.4623064
# check mean and standard deviation of body depth and snout length
# means = 0 and standard deviations = 1
practice.means <- herb.practice %>%
  summarize(mean.zbd = mean(bd.zscore),
            mean.zsl = mean(sl.zscore),
            sd.zbd = sd(bd.zscore),
            sd.zsl = sd(sl.zscore))
practice.means
##        mean.zbd     mean.zsl sd.zbd sd.zsl
## 1 -2.520945e-17 2.095355e-16      1      1

Z score standardization can also be achieved with the scale() function.

Demonstration of scale() function on a vector

# use scale() on body depth
# be aware that scale() turns output into a matrix, which can be turned back into a vector with as.vector()
scaled.bd <- scale(herbivore.morphology$bodydepth)
head(scaled.bd)
##             [,1]
## [1,]  1.47082227
## [2,] -0.02985761
## [3,]  0.48375384
## [4,]  1.11168937
## [5,]  1.49552849
## [6,]  0.65894499
# mean of standardized body depth = 0
mean(scaled.bd)
## [1] -2.405646e-17
# standard deviation of standardized body depth = 1
sd(scaled.bd)
## [1] 1

Applying the scale() function across a dataset with tidyverse

# use scale() across multiple columns with across() function in mutate()
herb.scaled <- herbivore.morphology %>%
  mutate(across(where(is.numeric), ~scale(.)))

# check that mean = 0 and standard deviation = 1 for one variable - body depth
mean(herb.scaled$bodydepth)
## [1] -2.405646e-17
sd(herb.scaled$bodydepth)
## [1] 1
# use gather() across columns
herb.scaled.long <- herb.scaled %>%
  gather(5:9, key = "trait", value = "value")
head(herb.scaled.long)
##         family      genus        species                   gen.spe     trait
## 1 Acanthuridae Acanthurus       achilles       Acanthurus.achilles bodydepth
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis bodydepth
## 3 Acanthuridae Acanthurus   auranticavus   Acanthurus.auranticavus bodydepth
## 4 Acanthuridae Acanthurus        blochii        Acanthurus.blochii bodydepth
## 5 Acanthuridae Acanthurus     dussumieri     Acanthurus.dussumieri bodydepth
## 6 Acanthuridae Acanthurus        fowleri        Acanthurus.fowleri bodydepth
##         value
## 1  1.47082227
## 2 -0.02985761
## 3  0.48375384
## 4  1.11168937
## 5  1.49552849
## 6  0.65894499
# summarize means and standard deviations across all variables
herb.scaled.summary <- herb.scaled.long %>%
  group_by(trait) %>%
  summarize(mean = mean(value),
            sd = sd(value))
head(herb.scaled.summary)
## # A tibble: 5 × 3
##   trait            mean    sd
##   <chr>           <dbl> <dbl>
## 1 bodydepth   -2.41e-17     1
## 2 eyediameter -1.52e-17     1
## 3 headlength  -4.80e-16     1
## 4 peduncle    -5.90e-18     1
## 5 snoutlength  2.10e-16     1
# use ggplot to visualize means and standard deviations of all variables
# use fishualize for color palette
trait.means <- ggplot(herb.scaled.summary, aes(x = trait, y = mean, color = trait)) +
  geom_pointrange(aes(ymin = mean-sd, ymax = mean+sd)) +
  geom_jitter(data = herb.scaled.long, aes(x = trait, y = value, color = trait), alpha = 0.2) +
  theme_bw() +
  scale_color_fish_d(option = "Acanthurus_sohal")
trait.means

2.2 Prepare dataset

The vegan package has an embedded function for data normalization, so it’s typically not necessary to perform the above data normalization.

Unlike most coding practices, which require data in long format, to perform a PCA, data need to be in a wide format. Data can be reformatted using the spread() and gather() functions.

Confirm that data are in wide format

head(herbivore.morphology)
##         family      genus        species                   gen.spe   bodydepth
## 1 Acanthuridae Acanthurus       achilles       Acanthurus.achilles 0.098504108
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 0.001397261
## 3 Acanthuridae Acanthurus   auranticavus   Acanthurus.auranticavus 0.034632323
## 4 Acanthuridae Acanthurus        blochii        Acanthurus.blochii 0.075265132
## 5 Acanthuridae Acanthurus     dussumieri     Acanthurus.dussumieri 0.100102812
## 6 Acanthuridae Acanthurus        fowleri        Acanthurus.fowleri 0.045968691
##   snoutlength  eyediameter headlength    peduncle
## 1   0.4877797  0.069172106  0.2315766 -0.03889361
## 2   0.4402623  0.000622937  0.2405350 -0.06240037
## 3   0.5386490 -0.008570787  0.2263199 -0.03512062
## 4   0.4782217 -0.004914155  0.2538896 -0.02705601
## 5   0.5661867  0.015159797  0.2443877 -0.04640333
## 6   0.5950563 -0.005349450  0.2244994 -0.06197221
str(herbivore.morphology)
## 'data.frame':    93 obs. of  9 variables:
##  $ family     : chr  "Acanthuridae" "Acanthuridae" "Acanthuridae" "Acanthuridae" ...
##  $ genus      : chr  "Acanthurus" "Acanthurus" "Acanthurus" "Acanthurus" ...
##  $ species    : chr  "achilles" "albipectoralis" "auranticavus" "blochii" ...
##  $ gen.spe    : chr  "Acanthurus.achilles" "Acanthurus.albipectoralis" "Acanthurus.auranticavus" "Acanthurus.blochii" ...
##  $ bodydepth  : num  0.0985 0.0014 0.0346 0.0753 0.1001 ...
##  $ snoutlength: num  0.488 0.44 0.539 0.478 0.566 ...
##  $ eyediameter: num  0.069172 0.000623 -0.008571 -0.004914 0.01516 ...
##  $ headlength : num  0.232 0.241 0.226 0.254 0.244 ...
##  $ peduncle   : num  -0.0389 -0.0624 -0.0351 -0.0271 -0.0464 ...

The dataset includes a few columns with metadata for each species (family, genus, and species). Multivariate functions can only be performed on numeric data, so the character data must be isolated.

Set row names to keep track of which row belongs to which species

herbivore.meta <- herbivore.morphology[1:4]
herbivore.data <- herbivore.morphology[5:9]
rownames(herbivore.data) <- herbivore.meta$gen.spe
head(herbivore.data)
##                             bodydepth snoutlength  eyediameter headlength
## Acanthurus.achilles       0.098504108   0.4877797  0.069172106  0.2315766
## Acanthurus.albipectoralis 0.001397261   0.4402623  0.000622937  0.2405350
## Acanthurus.auranticavus   0.034632323   0.5386490 -0.008570787  0.2263199
## Acanthurus.blochii        0.075265132   0.4782217 -0.004914155  0.2538896
## Acanthurus.dussumieri     0.100102812   0.5661867  0.015159797  0.2443877
## Acanthurus.fowleri        0.045968691   0.5950563 -0.005349450  0.2244994
##                              peduncle
## Acanthurus.achilles       -0.03889361
## Acanthurus.albipectoralis -0.06240037
## Acanthurus.auranticavus   -0.03512062
## Acanthurus.blochii        -0.02705601
## Acanthurus.dussumieri     -0.04640333
## Acanthurus.fowleri        -0.06197221

2.3 PCA Example

How to run a PCA in R:

  1. Base R: prcomp(), princomp()
  2. Package vegan: rda()
  3. Package ade4: dudi.pca()

Simple example with vegan

# vegan uses the rda() function to run a PCA
# RDA stands for redundancy analysis, and it is a constrained ordination
# by leaving an RDA unconstrained (no predictor variables), we can run a PCA
# set scale = TRUE to make sure the data are normalized
trait.pca <- rda(herbivore.data, scale = TRUE)

PCA output

# call the PCA output
trait.pca
## Call: rda(X = herbivore.data, scale = TRUE)
## 
##               Inertia Rank
## Total               5     
## Unconstrained       5    5
## Inertia is correlations 
## 
## Eigenvalues for unconstrained axes:
##    PC1    PC2    PC3    PC4    PC5 
## 2.8475 1.1003 0.6356 0.2725 0.1441
# use summary to get more detailed output
summary(trait.pca)
## 
## Call:
## rda(X = herbivore.data, scale = TRUE) 
## 
## Partitioning of correlations:
##               Inertia Proportion
## Total               5          1
## Unconstrained       5          1
## 
## Eigenvalues, and their contribution to the correlations 
## 
## Importance of components:
##                          PC1    PC2    PC3    PC4     PC5
## Eigenvalue            2.8475 1.1003 0.6356 0.2725 0.14407
## Proportion Explained  0.5695 0.2201 0.1271 0.0545 0.02881
## Cumulative Proportion 0.5695 0.7896 0.9167 0.9712 1.00000
## 
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores:  4.631157 
## 
## 
## Species scores
## 
##                PC1     PC2     PC3     PC4     PC5
## bodydepth   -1.050 -1.5373 -0.8116  0.3642  0.1827
## snoutlength -1.452 -1.0370  0.9612 -0.2152 -0.3697
## eyediameter -1.647  0.8518 -0.7641  0.1693 -0.4881
## headlength   1.688 -0.6794 -0.7037 -0.6245 -0.3083
## peduncle     1.856 -0.3067  0.2545  0.7558 -0.3387
## 
## 
## Site scores (weighted sums of species scores)
## 
##                                 PC1       PC2      PC3       PC4      PC5
## Acanthurus.achilles       -0.549256 -0.196847 -0.41673  0.826915 -0.26084
## Acanthurus.albipectoralis -0.192537  0.122071  0.17912 -0.036085  0.62505
## Acanthurus.auranticavus   -0.324185 -0.232743  0.50241  0.312512  0.43580
## Acanthurus.blochii        -0.213814 -0.415571 -0.06857  0.287514  0.48191
## Acanthurus.dussumieri     -0.455527 -0.591079  0.02430  0.327667  0.05007
## Acanthurus.fowleri        -0.454255 -0.362957  0.59424  0.135611  0.31241
## Acanthurus.guttatus       -0.096086 -1.258641 -1.51551 -0.087712  0.59606
## Acanthurus.leucocheilus   -0.784262  0.087402  0.09582  0.976815 -0.68697
## Acanthurus.leucopareius   -0.390412 -0.642024 -0.32687  0.488518  0.74926
## Acanthurus.lineatus       -0.100012 -0.213455  0.34984  0.426566  0.81645
## Acanthurus.maculiceps     -0.383387 -0.409522  0.24407  0.061145 -0.45157
## Acanthurus.mata           -0.401230  0.234969  0.28977  0.172868  0.22837
## Acanthurus.nigricans      -0.386528 -0.345298 -0.30188  0.362352  0.03945
## Acanthurus.nigricauda     -0.465632 -0.032885  0.53010  0.485782 -0.30592
## Acanthurus.nigrofuscus    -0.091720  0.173699 -0.08377  0.391398  0.18417
## Acanthurus.nigroris       -0.347660  0.004722  0.42290  0.593773  0.33824
## Acanthurus.olivaceus      -0.322469  0.065212  0.45269  0.360310  0.12960
## Acanthurus.pyroferus      -0.310583 -0.150954  0.08954  0.587208 -0.17343
## Acanthurus.thompsoni      -0.347337  0.598348  0.16656  0.898583  0.03072
## Acanthurus.triostegus     -0.120328 -0.178206 -0.24733 -0.108616  0.30333
## Acanthurus.xanthopterus   -0.322311 -0.474877 -0.02990 -0.075403  0.35733
## Bolbometopon.muricatum     0.220941  0.015465 -0.81513  0.241744 -0.40580
## Calotomus.spinidens        0.736584  0.508882  0.41982  0.145244  0.12395
## Cetoscarus.ocellatus       0.515129 -0.477080 -0.29621 -0.796305 -0.98072
## Chlorurus.bleekeri         0.619642  0.091061  0.33170  0.118199 -0.38685
## Chlorurus.bowersi          0.678347 -0.280599  0.26759 -0.235371  0.27302
## Chlorurus.frontalis        0.393237 -0.110726 -0.21260  0.030249 -0.29273
## Chlorurus.japanensis       0.599980 -0.047435  0.32979  0.165712  0.03893
## Chlorurus.microrhinos      0.204752 -0.105734 -0.33058  0.690883 -0.11013
## Chlorurus.spilurus         0.717304 -0.040805  0.35929 -0.089668 -0.53735
## Ctenochaetus.binotatus    -0.071096 -0.185757 -0.32754  0.106305 -0.64630
## Ctenochaetus.flavicauda   -0.244049 -0.415097 -0.28747  0.500793 -0.24415
## Ctenochaetus.hawaiiensis  -0.477079 -0.981251  0.52867  0.244165 -0.04086
## Ctenochaetus.marginatus   -0.475728 -0.347915  0.16410  0.723585  0.37866
## Ctenochaetus.striatus     -0.125231 -0.524514  0.38987 -0.006667 -0.02281
## Ctenochaetus.strigosus    -0.110491 -0.629268  0.20461  0.126077  0.49351
## Ctenochaetus.tominiensis  -0.004192 -0.335207  0.67845  0.134942  0.36380
## Hipposcarus.longiceps      0.295972 -0.326860 -0.60504 -0.947791 -0.24853
## Kyphosus.vaigiensis        0.543936 -0.241914 -1.55915 -0.972814  1.33836
## Leptoscarus.vaigiensis     0.733534  0.419722  0.26562  0.060847 -0.26997
## Naso.annulatus            -0.671303  0.636315  0.40144 -0.031478 -0.40852
## Naso.brachycentron        -0.740326 -0.012775 -0.25981 -1.087854 -0.54543
## Naso.brevirostris         -0.654983  0.459308  0.38794 -0.338188 -0.09561
## Naso.hexacanthus          -0.582123  0.672470  0.78234 -0.567505 -0.06626
## Naso.lituratus            -0.338452 -0.205942  0.51040 -0.938095 -0.04949
## Naso.lopezi               -0.642887  0.752491  0.32265 -0.509322 -0.53358
## Naso.thynnoides           -0.277299  0.903328  0.67391 -0.819409  0.57055
## Naso.tuberosus            -0.600209  0.343675  0.01078 -0.889882 -0.59383
## Naso.unicornis            -0.727336 -0.259883  0.46713 -0.712284  0.14285
## Naso.vlamingii            -0.784968  0.476269  0.41511 -0.481296 -0.56490
## Paracanthurus.hepatus     -0.170708 -0.260372  0.40963 -0.304985  0.15924
## Scarus.altipinnis          0.362909 -0.433734  0.27530  0.096693 -0.55635
## Scarus.chameleon           0.735874  0.023917  0.25569 -0.053399  0.39927
## Scarus.dimidiatus          0.798730  0.076239  0.49850  0.149822 -0.06216
## Scarus.festivus            0.635275  0.044563 -0.18677  0.189500  0.02563
## Scarus.flavipectoralis     0.761533  0.219163  0.44764 -0.033140  0.53360
## Scarus.forsteni            0.557740  0.112582  0.14137  0.149350  0.06873
## Scarus.frenatus            0.710884  0.031198  0.23501  0.278360 -0.28597
## Scarus.ghobban             0.320318 -0.255042 -0.44972 -0.407212 -0.31276
## Scarus.globiceps           0.670007 -0.006700  0.22356  0.250030 -0.05515
## Scarus.hypselopterus       0.765772  0.352374  0.47912  0.265259  0.34841
## Scarus.longipinnis         0.382589  0.374147  0.24181  0.696125 -0.16082
## Scarus.niger               0.575313 -0.138538 -0.12514  0.287207 -0.19853
## Scarus.oviceps             0.787324 -0.010247  0.37864 -0.043355 -0.53162
## Scarus.prasiognathos       0.530023 -0.083433  0.10007  0.193815  0.15194
## Scarus.psittacus           0.679511 -0.194834  0.08906  0.092821 -0.04585
## Scarus.quoyi               0.569658  0.033128 -0.08175  0.055416  0.03527
## Scarus.rivulatus           0.505842 -0.172985 -0.13776  0.728452  0.21801
## Scarus.rubroviolaceus      0.348523  0.044175 -0.43462 -0.099810 -0.86521
## Scarus.schlegeli           0.706009 -0.069593 -0.06508 -0.179623  0.01703
## Scarus.spinus              0.578802 -0.064642  0.07582  0.346024 -0.54801
## Scarus.tricolor            0.657465  0.174872  0.22802  0.158247  0.01095
## Scarus.xanthopleura        0.463790 -0.088127 -0.45253 -0.403154 -0.79054
## Siganus.argenteus         -0.146839  0.775781  0.35105 -0.523297  1.41224
## Siganus.canaliculatus      0.006319  0.891053  0.23832 -0.480920  0.62514
## Siganus.corallinus        -0.080446  0.014277 -0.30966 -0.747394  0.25946
## Siganus.doliatus          -0.123483  0.358985 -0.75453 -0.166010  0.41527
## Siganus.fuscescens        -0.147512  0.791480 -0.30559 -0.184150  0.26082
## Siganus.guttatus          -0.470678  0.617754 -1.56597  0.639240 -0.32156
## Siganus.javus             -0.093929  0.249917 -0.57795 -0.362118  0.71322
## Siganus.lineatus          -0.148844  0.383187 -0.70603 -0.197787  0.10004
## Siganus.niger             -0.249042  0.298231 -0.43268 -0.475022 -0.55285
## Siganus.puellus            0.054996  0.429036 -0.11193 -0.771388 -0.05215
## Siganus.punctatissimus    -0.479031  0.650957 -0.98195  0.597733 -0.13505
## Siganus.punctatus         -0.302639  0.366550 -0.25728  0.100289  0.56360
## Siganus.randalli          -0.098522  1.047577 -0.77982  0.130004 -1.27195
## Siganus.spinus            -0.052700  1.388201 -0.02476 -0.082678 -0.03378
## Siganus.stellatus         -0.287467  0.075813 -0.10246  0.096600  1.12423
## Siganus.vermiculatus      -0.016556  0.041327 -0.67886 -0.307382  0.40666
## Zebrasoma.flavescens      -0.649717 -0.639305 -0.13111  0.138910 -0.59445
## Zebrasoma.rostratum       -0.094915 -1.611350  0.81659 -1.449383 -0.63718
## Zebrasoma.scopas          -0.369763 -1.103403 -0.20763 -0.361144  0.13989
## Zebrasoma.velifer         -0.854489 -0.265766  0.20787  0.740915 -0.45596

Interpretation of output

  • Summary: Call summarizes what was run.
  • Unconstrained inertia: R provides a table that summarizes total inertia, how much is unconstrained, and their proportional representation. A PCA does not have predictor variables, so all of the inertia is unconstrained.
  • Eigenvalues: R provides eigenvalues and their relative contributions. These values represent whether the PCA adequately captures variation in the data. Focus on the proportion explained. PC1 explains nearly 57% of the data, and PC2 explains 22% of the data. Together, they capture 79% of the variation in the data. This is good!
  • Species scores: Species is a default descriptor in vegan. In our case, it refers to fish traits.
  • Site scores: Site is another default descriptor in vegan. In our case, it refers to fish species.

3 Visualize PCA

3.1 Base R

Use base R to visualize the PCA

# use biplot to make a PCA plot with base R
# each point is a fish species, and they are the species scores 
# each vector is a fish trait, and are called loadings
biplot(trait.pca)

Use the screeplot() function to show the relative importance of each component

screeplot(trait.pca)

Add information to improve the base R plot

# remember that "species" refers to fish traits and "sites" refers to fish species
biplot(trait.pca,
       display = c("sites", "species"),
       type = c("text", "text"),
       col = c("black", "grey"))

Use metadata to further improve the plot

# make the basic plot, without all of the fish species names
biplot(trait.pca,
       type = c("text", "points"),
       col = c("black", "grey"))

# visualize groupings among fish families
# add convex hulls around all species (points) within a fish family
ordihull(trait.pca,
         group = herbivore.meta$family,
         col = c("blue", "goldenrod2", "forestgreen", "orchid"))

# add a legend
# note that Kyphosidae is not visible on the plot because a convex hull cannot be drawn around only one species
fam.names <- sort(unique(herbivore.meta$family))
legend("topright",
       col = c("blue", "goldenrod2", "forestgreen", "orchid"), 
       lty = 1,
       legend = fam.names)

3.2 ggplot2

To use ggplot, the PCA data must be extracted. It is not automated like in base R.

Retrieve the site scores

# first create an object with all the PCA data
pca.summary <- summary(trait.pca)

# extract the "sites" - fish species
# use inner_join to merge the site scores with the metadata
sp.scores <- as.data.frame(pca.summary$sites) %>%
  mutate(gen.spe = rownames(.)) %>%
  inner_join(herbivore.meta)
## Joining, by = "gen.spe"
head(sp.scores)
##          PC1        PC2        PC3         PC4         PC5
## 1 -0.5492562 -0.1968472 -0.4167308  0.82691471 -0.26084223
## 2 -0.1925365  0.1220711  0.1791163 -0.03608451  0.62505315
## 3 -0.3241855 -0.2327432  0.5024143  0.31251181  0.43579767
## 4 -0.2138137 -0.4155709 -0.0685711  0.28751377  0.48191286
## 5 -0.4555274 -0.5910787  0.0242970  0.32766748  0.05006746
## 6 -0.4542550 -0.3629571  0.5942380  0.13561081  0.31241064
##                     gen.spe       family      genus        species
## 1       Acanthurus.achilles Acanthuridae Acanthurus       achilles
## 2 Acanthurus.albipectoralis Acanthuridae Acanthurus albipectoralis
## 3   Acanthurus.auranticavus Acanthuridae Acanthurus   auranticavus
## 4        Acanthurus.blochii Acanthuridae Acanthurus        blochii
## 5     Acanthurus.dussumieri Acanthuridae Acanthurus     dussumieri
## 6        Acanthurus.fowleri Acanthuridae Acanthurus        fowleri

Retrieve the loadings

# extract the "species" - fish traits
trait.loadings <- as.data.frame(pca.summary$species) %>%
  mutate(trait = rownames(.))
head(trait.loadings)
##                   PC1        PC2        PC3        PC4        PC5       trait
## bodydepth   -1.049594 -1.5372541 -0.8115897  0.3642146  0.1827414   bodydepth
## snoutlength -1.451651 -1.0370060  0.9611858 -0.2151709 -0.3696923 snoutlength
## eyediameter -1.647169  0.8518456 -0.7640840  0.1693478 -0.4880713 eyediameter
## headlength   1.687522 -0.6793976 -0.7036693 -0.6245244 -0.3082686  headlength
## peduncle     1.856005 -0.3066973  0.2544979  0.7557992 -0.3386767    peduncle

Visualize PCA with ggplot

# use fishualize as color palette
# color points by fish family
pca.ggplot <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = family)) +
  theme_bw() +
  scale_color_fish_d(option = "Acanthurus_sohal")
pca.ggplot

Add loadings to plot

# use geom_segment() to draw segments for loadings
# use geom_text() to add labels
# various other aesthetics to limit axes and position the legend at the top of the plot
# include the variance explained on the x-axis and y-axis
pca.ggplot.loadings <- ggplot(data = sp.scores, aes(x = PC1, y = PC2)) +  
  geom_point(aes(color = family), size = 2.5) + 
  geom_segment(data = trait.loadings, aes(x = 0, xend = PC1/2, y = 0, yend = PC2/2), lwd = 0.1) +
  geom_text(data = trait.loadings, aes(x = PC1/2, y = PC2/2+0.1, label = trait), size = 3) +
  theme_bw() +
  scale_color_fish_d(option = "Acanthurus_sohal") +
  scale_x_continuous(limits = c(-1,1)) +
  theme(legend.position = "top") +
  xlab("PC1 (57.0%)") + 
  ylab("PC2 (22.0%)")
pca.ggplot.loadings

Add convex hulls to delineate families

# create convex hulls grouped by family based on site scores
# use the slice() and the chull() functions to calculate each family's convex hull
family.hulls <- sp.scores %>%
  group_by(family) %>%
  slice(chull(PC1, PC2))

# add the family convex hulls to the PCA
pca.plot.loadings.hulls <- pca.ggplot.loadings +
  geom_polygon(data = family.hulls, aes(x = PC1, y = PC2, fill = family), alpha = 0.4, show.legend = F)
pca.plot.loadings.hulls