Scaling data

  1. \(x_{ij}\) untransformed data
  2. \(x_{ij} - \bar{x_j}\) centered data
  3. \(\frac{(x_{ij} - \bar{x_j})}{s_j}\) standardized data
  4. \(ln{x_{ij}}\) log transformed data
  5. \(ln{\frac{x_{ij}}{x_{ip}}}\) for \(j =1, 2,\dots p-1\)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(compositions)
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
## 
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
## 
##     cor, cov, dist, var
## The following objects are masked from 'package:base':
## 
##     %*%, norm, scale, scale.default
path_compo <- here::here("data/NEMCICE_COMPO_FINAL.xlsx")

# read data
compo <- readxl::read_excel(path_compo, sheet = "Nemcice_PCA") %>% 
  janitor::clean_names() %>% 
  select(id, site, starts_with("type"), pozn, dating, fe:bi)

elements <- c("co", "ni", "zn", "as", "ag", "sb", "bi")
# mat_compo <- as.matrix(select(compo, all_of(elements))) # trace elements
mat_compo <- as.matrix(select(compo, fe:bi)) # all elements

Standardized data

Scaled to mean 0 and standard deviation 1

pca <- prcomp(scale(mat_compo), scale. = FALSE, center = FALSE)
biplot(pca)

Logged data

pca <- prcomp(log10(mat_compo + 1), scale. = FALSE, center = FALSE)
biplot(pca)

Logged and standardized

pca <- prcomp(scale(log10(mat_compo + 1)), scale. = FALSE, center = FALSE)
biplot(pca)

Logratio transformed

pca <- prcomp(ilr(mat_compo), scale. = FALSE, center = FALSE)
biplot(pca)