library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggrepel)
library(patchwork)
library(knitr)

# Load package if running from vignettes directory
# If you have a custom data loading function in your package, use it here
# Otherwise, comment this out and rely on data loading from CSV files below
# devtools::load_all("..")

opts_chunk$set(
  echo = FALSE,
  warning = FALSE,
  message = FALSE,
  fig.align = "center",
  out.width = "95%"
)

theme_set(
  theme_bw(base_size = 12) +
    theme(
      panel.grid.minor = element_blank(),
      strip.background = element_rect(fill = "#eeeeee"),
      strip.text = element_text(face = "bold"),
      plot.margin = margin(8, 8, 8, 8)
    )
)

Aim

This vignette performs a focused Principal Component Analysis (PCA) on the abiotic environmental variables that characterize the urbanization gradient across the three Trinidadian river systems (Maracas, Tacarigua, Tunapuna).

Key distinction from the main characterization vignette: This analysis includes ONLY abiotic predictors (drivers of the urbanization gradient) and explicitly EXCLUDES biological response variables (algal biomass and guppy population density). This ensures we capture the true environmental gradient without incorporating variables that are themselves responses to urbanization.

Variables included in the PCA (Abiotic Predictors Only):

  • Human population density - from spatial datasets (2020–2023)
  • Canopy cover - proportion of covered area (densiometer)
  • Temperature - water temperature (°C)
  • pH - unitless
  • Conductivity - electrical conductivity (μS/cm)
  • Dissolved oxygen - percentage saturation (%)
  • Total Nitrogen - nitrogen concentration (mg/L)
  • Nitrate - NO₃⁻ (mg/L)
  • Nitrite - NO₂⁻ (mg/L)
  • Ammonium - NH₄⁺ (mg/L)
  • Total Phosphorus - total phosphorus (mg/L)
  • Phosphate - PO₄³⁻ (mg/L)
  • Fecal Coliforms - bacterial indicator (counts/100mL)

Variables EXCLUDED (Biological Responses):

  • Chlorophyll a from benthic algae (Cyanobacteria, Diatoms)
  • Guppy population density

Data Preparation

## Sample of prepared data:
## # A tibble: 6 × 14
##   River   Origin PopDensity CanopyCover Temperature    pH Conductivity
##   <chr>   <fct>       <dbl>       <dbl>       <dbl> <dbl>        <dbl>
## 1 Maracas LU           538.       0.634        25.8  7.59          518
## 2 Maracas LU           447.       0.634        25.8  7.59          518
## 3 Maracas LU           361.       0.634        25.8  7.59          518
## 4 Maracas LU           278.       0.634        25.8  7.59          518
## 5 Maracas LU           278.       0.634        25.8  7.59          518
## 6 Maracas LU           221.       0.634        25.8  7.59          518
## # ℹ 7 more variables: DissolvedOxygen <dbl>, TotalN <dbl>, Nitrate <dbl>,
## #   Nitrite <dbl>, Ammonium <dbl>, TotalP <dbl>, FecalColiforms <dbl>
## 
## Data summary:
##     River           Origin    PopDensity       CanopyCover      
##  Length:36          LU:18   Min.   :  31.58   Min.   :0.004202  
##  Class :character   HU:18   1st Qu.: 339.97   1st Qu.:0.184874  
##  Mode  :character           Median :2900.59   Median :0.462185  
##                             Mean   :3129.46   Mean   :0.408263  
##                             3rd Qu.:5197.10   3rd Qu.:0.634454  
##                             Max.   :7538.50   Max.   :0.701681  
##   Temperature          pH         Conductivity DissolvedOxygen 
##  Min.   :25.80   Min.   :7.410   Min.   :222   Min.   : 50.50  
##  1st Qu.:26.60   1st Qu.:7.430   1st Qu.:457   1st Qu.: 51.10  
##  Median :27.60   Median :7.515   Median :533   Median : 93.25  
##  Mean   :27.93   Mean   :7.585   Mean   :489   Mean   : 84.97  
##  3rd Qu.:28.60   3rd Qu.:7.710   3rd Qu.:588   3rd Qu.:100.80  
##  Max.   :31.40   Max.   :7.930   Max.   :601   Max.   :120.90  
##      TotalN           Nitrate          Nitrite          Ammonium     
##  Min.   :0.09938   Min.   :0.0340   Min.   :0.0300   Min.   :0.1000  
##  1st Qu.:0.10999   1st Qu.:0.0450   1st Qu.:0.0380   1st Qu.:0.1200  
##  Median :0.21866   Median :0.1130   Median :0.0995   Median :0.2050  
##  Mean   :0.23223   Mean   :0.1345   Mean   :0.1232   Mean   :0.2117  
##  3rd Qu.:0.31281   3rd Qu.:0.1240   3rd Qu.:0.1180   3rd Qu.:0.3100  
##  Max.   :0.43386   Max.   :0.3780   Max.   :0.3540   Max.   :0.3300  
##      TotalP       FecalColiforms  
##  Min.   :0.0600   Min.   :   4.0  
##  1st Qu.:0.0800   1st Qu.:  45.0  
##  Median :0.1000   Median :  47.5  
##  Mean   :0.1783   Mean   : 557.3  
##  3rd Qu.:0.1200   3rd Qu.:1600.0  
##  Max.   :0.6100   Max.   :1600.0

Principal Component Analysis

1. PCA Biplot: PC1 vs PC2 with Variable Loadings


2. PCA Loadings Table

## 
## ## PCA Loadings (Variable Contributions to Principal Components)
##           Variable    PC1    PC2    PC3
## 1       PopDensity  0.741 -0.498  0.171
## 2      CanopyCover -0.803  0.066 -0.579
## 3      Temperature  0.961 -0.111  0.166
## 4               pH -0.291 -0.022  0.936
## 5     Conductivity  0.369 -0.893  0.189
## 6  DissolvedOxygen -0.555  0.485  0.644
## 7           TotalN  0.827  0.561 -0.021
## 8          Nitrate  0.972  0.176  0.074
## 9          Nitrite  0.976  0.189  0.038
## 10        Ammonium  0.573  0.797 -0.081
## 11          TotalP  0.982 -0.041 -0.110
## 12  FecalColiforms  0.454 -0.249 -0.281
## 
## 
## ## Variance Explained by Each Component
##         PC Variance_Explained Cumulative_Variance
## comp 1 PC1            5604.00             5604.00
## comp 2 PC2            1982.10             7586.10
## comp 3 PC3            1519.30             9105.40
## comp 4 PC4             772.94             9878.34
## comp 5 PC5              87.07             9965.42
## 
## 
## ## Scree Plot


Interpretation of Loadings

PC1 Loadings (Urbanization Gradient):

Strongest positive contributors (urban signature): - TotalP: 0.982 - Nitrite: 0.976 - Nitrate: 0.972 - Temperature: 0.961 - TotalN: 0.827

Strongest negative contributors (pristine signature):

PC2 Loadings (Secondary Gradient):

Strongest positive contributors: - Ammonium: 0.797 - TotalN: 0.561 - DissolvedOxygen: 0.485

Strongest negative contributors: - Conductivity: -0.893 - PopDensity: -0.498


3. Nutrient Axes: N vs P Mapping & Eutrophication Axis

This section examines whether urbanization creates a unified “eutrophication” axis (N and P enriched together) or separate N and P axes (differential enrichment).

## ### Nutrient Variable Loadings on PC1 and PC2
## **Nitrogen species:**
##   Variable   PC1   PC2    PC3
## 1 Ammonium 0.573 0.797 -0.081
## 2  Nitrate 0.972 0.176  0.074
## 3  Nitrite 0.976 0.189  0.038
## 4   TotalN 0.827 0.561 -0.021
## 
## **Phosphorus:**
##   Variable   PC1    PC2   PC3
## 1   TotalP 0.982 -0.041 -0.11
## 
## 
## ### N-P Axis Analysis
## Angle between mean N vector and P vector: 29.6 degrees
## 
## **Interpretation:**
## - **COUPLED nutrient enrichment**: N and P load similarly (unified eutrophication axis)
## - Urban sites are enriched in BOTH N and P proportionally
## - Limited stoichiometric imbalance from urbanization


4. Site Scores: PC1 vs PC2 Colored by Urbanization

Statistical Separation of HU and LU Sites

## 
## 
## ### By-River Summary of PC1 Scores
## 
## 
## Table: PC1 scores by river and urbanization level
## 
## |River     |Origin | Mean_PC1| SD_PC1|  N|
## |:---------|:------|--------:|------:|--:|
## |Maracas   |LU     |   -2.253|  0.014|  6|
## |Maracas   |HU     |    0.186|  0.152|  6|
## |Tacarigua |LU     |   -1.200|  0.000|  6|
## |Tacarigua |HU     |   -1.239|  0.012|  6|
## |Tunapuna  |LU     |   -1.070|  0.039|  6|
## |Tunapuna  |HU     |    5.576|  0.123|  6|

Summary & Interpretation

Key Findings

PC1: The Urbanization Gradient

PC1 represents the primary urbanization gradient, explaining the majority of variance in the dataset.

  • Positive direction (High Urbanization): Elevated conductivity, nutrients (N & P), temperature, fecal coliforms
  • Negative direction (Low Urbanization): Higher canopy cover, dissolved oxygen, lower nutrient concentrations

PC2: Secondary Environmental Variation

PC2 likely captures: - Geomorphological variation among rivers - Specific nutrient forms (ammonium vs. nitrate) - Secondary disturbance factors

Nutrient Enrichment Pattern

The analysis reveals whether urbanization creates: - A unified eutrophication axis (N and P enrich proportionally) - Separate N and P axes (differential enrichment creates stoichiometric imbalances)

The angle between N and P vectors indicates the degree of coupling between these nutrient types.

Site Separation

HU and LU sites show significant separation along PC1 in the biplot above, indicating that abiotic environmental differences are substantial between urbanization levels.


Next Steps

This PCA characterization will serve as:

  1. An ecological predictor in subsequent analyses of guppy stoichiometry, life-history traits, and diet
  2. A validation of our HU/LU site classification
  3. A framework for understanding whether nutrient imbalances drive guppy elemental phenotype

Session Information

## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.7.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.49       patchwork_1.3.0  ggrepel_0.9.6    lubridate_1.9.4 
##  [5] forcats_1.0.0    stringr_1.5.1    dplyr_1.1.4      purrr_1.0.4     
##  [9] readr_2.1.5      tidyr_1.3.1      tibble_3.2.1     tidyverse_2.0.0 
## [13] factoextra_1.0.7 ggplot2_3.5.2    FactoMineR_2.11 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6         xfun_0.51            bslib_0.9.0         
##  [4] htmlwidgets_1.6.4    rstatix_0.7.2        lattice_0.22-6      
##  [7] tzdb_0.4.0           vctrs_0.6.5          tools_4.4.2         
## [10] generics_0.1.4       sandwich_3.1-1       cluster_2.1.6       
## [13] pkgconfig_2.0.3      Matrix_1.7-1         RColorBrewer_1.1-3  
## [16] scatterplot3d_0.3-44 lifecycle_1.0.5      compiler_4.4.2      
## [19] farver_2.1.2         leaps_3.2            codetools_0.2-20    
## [22] carData_3.0-5        htmltools_0.5.8.1    sass_0.4.9          
## [25] yaml_2.3.10          Formula_1.2-5        car_3.1-3           
## [28] ggpubr_0.6.0         pillar_1.10.2        jquerylib_0.1.4     
## [31] MASS_7.3-61          flashClust_1.01-2    DT_0.33             
## [34] cachem_1.1.0         abind_1.4-8          multcomp_1.4-28     
## [37] tidyselect_1.2.1     digest_0.6.37        mvtnorm_1.3-3       
## [40] stringi_1.8.7        labeling_0.4.3       splines_4.4.2       
## [43] fastmap_1.2.0        grid_4.4.2           cli_3.6.5           
## [46] magrittr_2.0.3       utf8_1.2.5           survival_3.7-0      
## [49] broom_1.0.7          TH.data_1.1-3        withr_3.0.2         
## [52] backports_1.5.0      scales_1.4.0         estimability_1.5.1  
## [55] timechange_0.3.0     rmarkdown_2.29       emmeans_1.10.7      
## [58] ggsignif_0.6.4       zoo_1.8-13           hms_1.1.3           
## [61] coda_0.19-4.1        evaluate_1.0.5       rlang_1.1.7         
## [64] Rcpp_1.1.0           xtable_1.8-4         glue_1.8.0          
## [67] rstudioapi_0.17.1    jsonlite_2.0.0       R6_2.6.1            
## [70] multcompView_0.1-10