1 1. Load Required Packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(ggtern)
## Registered S3 methods overwritten by 'ggtern':
##   method           from   
##   grid.draw.ggplot ggplot2
##   plot.ggplot      ggplot2
##   print.ggplot     ggplot2
## --
## Remember to cite, run citation(package = 'ggtern') for further info.
## --
## 
## Attaching package: 'ggtern'
## The following objects are masked from 'package:ggplot2':
## 
##     aes, annotate, ggplot, ggplot_build, ggplot_gtable, ggplotGrob,
##     ggsave, layer_data, theme_bw, theme_classic, theme_dark,
##     theme_gray, theme_light, theme_linedraw, theme_minimal, theme_void
#library(chemWater)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggcorrplot)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

2 2. Load and Inspect Data

df <- readxl::read_excel("normalized_data (5).xlsx")

3 Inspect structure

str(df)
## tibble [286 × 18] (S3: tbl_df/tbl/data.frame)
##  $ pH                       : num [1:286] -0.1675 0.1964 -0.8434 -0.0809 -0.7048 ...
##  $ EC                       : num [1:286] -0.136 -0.225 -0.564 -0.323 -0.297 ...
##  $ TDS (mg/L)               : num [1:286] 0.0338 -0.1919 -0.5234 -0.1776 -0.1533 ...
##  $ CO32- (mg/L)             : num [1:286] -0.302 -0.302 -0.302 -0.302 -0.302 ...
##  $ HCO3- (mg/L)             : num [1:286] -0.182 0.124 -0.773 -0.15 -0.397 ...
##  $ Cl- (mg/L)               : num [1:286] -0.151 -0.43 -0.414 -0.422 -0.168 ...
##  $ SO42- (mg/L)             : num [1:286] 0.3545 -0.3841 -0.1045 0.0415 0.1102 ...
##  $ NO3- (mg/L)              : num [1:286] -0.0835 -0.1745 -0.0699 0.0607 -0.0931 ...
##  $ Na+ (mg/L)               : num [1:286] -0.0807 -0.4944 -0.4558 -0.2683 -0.2297 ...
##  $ K+ (mg/L)                : num [1:286] -0.443 0.561 -1.017 -0.759 -0.586 ...
##  $ Ca2+ (mg/L)              : num [1:286] -0.11733 0.84876 -0.53108 -0.00727 -0.20905 ...
##  $ Mg2+ (mg/L)              : num [1:286] 0.01535 -0.3031 -0.67527 -0.02685 -0.00767 ...
##  $ Fe2+, mg/L)              : num [1:286] -0.373 0.367 0.33 -0.432 -0.105 ...
##  $ Total Alkalinity as CaCO3: num [1:286] -0.196 0.11 -0.787 -0.163 -0.411 ...
##  $ Total Hardness as CaCO3  : num [1:286] -0.0626 0.3397 -0.6464 -0.0207 -0.1254 ...
##  $ Sample Code              : chr [1:286] "USGW001_D" "USGW001_W" "USGW002" "USGW003" ...
##  $ Date                     : chr [1:286] "42903" "43163" "43163" "42902" ...
##  $ Season                   : chr [1:286] "Dry" "Wet" "Wet" "Dry" ...
summary(df)
##        pH                 EC            TDS (mg/L)       CO32- (mg/L)    
##  Min.   :-3.18308   Min.   :-0.9277   Min.   :-0.8792   Min.   :-0.3019  
##  1st Qu.:-0.64847   1st Qu.:-0.5854   1st Qu.:-0.5530   1st Qu.:-0.3019  
##  Median : 0.02309   Median :-0.2142   Median :-0.2183   Median :-0.3019  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.46936   3rd Qu.: 0.1792   3rd Qu.: 0.1321   3rd Qu.:-0.3019  
##  Max.   : 3.85317   Max.   : 9.4140   Max.   : 9.2411   Max.   : 6.9619  
##   HCO3- (mg/L)       Cl- (mg/L)        SO42- (mg/L)      NO3- (mg/L)      
##  Min.   :-1.3159   Min.   :-0.50167   Min.   :-0.6379   Min.   :-0.19653  
##  1st Qu.:-0.7431   1st Qu.:-0.40996   1st Qu.:-0.5296   1st Qu.:-0.18717  
##  Median :-0.1206   Median :-0.32401   Median :-0.3655   Median :-0.15614  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.5716   3rd Qu.:-0.02678   3rd Qu.: 0.1095   3rd Qu.:-0.08715  
##  Max.   : 6.6592   Max.   :11.93478   Max.   : 6.8345   Max.   :10.79594  
##    Na+ (mg/L)         K+ (mg/L)        Ca2+ (mg/L)       Mg2+ (mg/L)     
##  Min.   :-0.55512   Min.   :-1.2747   Min.   :-1.4625   Min.   :-1.1625  
##  1st Qu.:-0.45293   1st Qu.:-0.6725   1st Qu.:-0.7160   1st Qu.:-0.7136  
##  Median :-0.34550   Median :-0.2567   Median :-0.1387   Median :-0.2801  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.02058   3rd Qu.: 0.4459   3rd Qu.: 0.4503   3rd Qu.: 0.3338  
##  Max.   :10.41159   Max.   : 7.5863   Max.   : 3.1723   Max.   : 4.0938  
##   Fe2+, mg/L)      Total Alkalinity as CaCO3 Total Hardness as CaCO3
##  Min.   :-0.6196   Min.   :-1.3303           Min.   :-1.4205        
##  1st Qu.:-0.5031   1st Qu.:-0.7509           1st Qu.:-0.7092        
##  Median :-0.3126   Median :-0.1200           Median :-0.1386        
##  Mean   : 0.0000   Mean   : 0.0000           Mean   : 0.0000        
##  3rd Qu.: 0.1837   3rd Qu.: 0.5646           3rd Qu.: 0.4963        
##  Max.   :12.5103   Max.   : 6.6480           Max.   : 3.9154        
##  Sample Code            Date              Season         
##  Length:286         Length:286         Length:286        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
head(df)
## # A tibble: 6 × 18
##        pH     EC `TDS (mg/L)` `CO32- (mg/L)` `HCO3- (mg/L)` `Cl- (mg/L)`
##     <dbl>  <dbl>        <dbl>          <dbl>          <dbl>        <dbl>
## 1 -0.168  -0.136       0.0338        -0.302          -0.182      -0.151 
## 2  0.196  -0.225      -0.192         -0.302           0.124      -0.430 
## 3 -0.843  -0.564      -0.523         -0.302          -0.773      -0.414 
## 4 -0.0809 -0.323      -0.178         -0.302          -0.150      -0.422 
## 5 -0.705  -0.297      -0.153         -0.302          -0.397      -0.168 
## 6  0.318   0.198       0.222         -0.0170          0.297       0.0405
## # ℹ 12 more variables: `SO42- (mg/L)` <dbl>, `NO3- (mg/L)` <dbl>,
## #   `Na+ (mg/L)` <dbl>, `K+ (mg/L)` <dbl>, `Ca2+ (mg/L)` <dbl>,
## #   `Mg2+ (mg/L)` <dbl>, `Fe2+, mg/L)` <dbl>,
## #   `Total Alkalinity as CaCO3` <dbl>, `Total Hardness as CaCO3` <dbl>,
## #   `Sample Code` <chr>, Date <chr>, Season <chr>

4 3. Principal Component Analysis (PCA)

hydro_vars <- df[, 1:15]
pca_result <- prcomp(hydro_vars, scale. = FALSE)

# PCA summary
summary(pca_result)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8880 1.3246 1.13151 0.98030 0.95006 0.80721 0.62597
## Proportion of Variance 0.5541 0.1166 0.08506 0.06384 0.05996 0.04329 0.02603
## Cumulative Proportion  0.5541 0.6706 0.75570 0.81955 0.87951 0.92280 0.94883
##                           PC8     PC9    PC10    PC11    PC12    PC13      PC14
## Standard deviation     0.5820 0.50528 0.39430 0.11265 0.06821 0.05837 4.699e-16
## Proportion of Variance 0.0225 0.01696 0.01033 0.00084 0.00031 0.00023 0.000e+00
## Cumulative Proportion  0.9713 0.98829 0.99862 0.99946 0.99977 1.00000 1.000e+00
##                            PC15
## Standard deviation     2.93e-16
## Proportion of Variance 0.00e+00
## Cumulative Proportion  1.00e+00
# Loadings
loadings <- pca_result$rotation
round(loadings[, 1:3], 2)
##                             PC1   PC2   PC3
## pH                         0.02  0.68 -0.04
## EC                         0.34 -0.03 -0.15
## TDS (mg/L)                 0.34 -0.05 -0.16
## CO32- (mg/L)               0.02  0.63 -0.21
## HCO3- (mg/L)               0.32 -0.03  0.21
## Cl- (mg/L)                 0.30 -0.06 -0.35
## SO42- (mg/L)               0.28  0.00 -0.25
## NO3- (mg/L)               -0.01 -0.24 -0.26
## Na+ (mg/L)                 0.30 -0.04 -0.35
## K+ (mg/L)                  0.19  0.23  0.09
## Ca2+ (mg/L)                0.27 -0.01  0.41
## Mg2+ (mg/L)                0.30  0.02  0.20
## Fe2+, mg/L)                0.10 -0.09 -0.36
## Total Alkalinity as CaCO3  0.32  0.00  0.20
## Total Hardness as CaCO3    0.31  0.01  0.33

4.1 3.1 Scree Plot and Biplot

# Scree plot
plot(pca_result, type = "l", main = "Scree Plot")

# Biplot
factoextra::fviz_pca_biplot(pca_result,
                            repel = TRUE,
                            col.var = "red",
                            col.ind = "blue",
                            geom.ind = "point",
                            pointsize = 2,
                            legend.title = "Groups")

5 4. Convert mg/L to meq/L

molar_mass <- c(Ca = 40.08, Mg = 24.31, Na = 22.99, K = 39.10, Cl = 35.45,
                SO4 = 96.06, HCO3 = 61.02, CO3 = 60.01)
valence <- c(Ca = 2, Mg = 2, Na = 1, K = 1, Cl = 1, SO4 = 2, HCO3 = 1, CO3 = 2)

df_meq <- df %>% mutate(
  Ca = `Ca2+ (mg/L)` / molar_mass["Ca"] * valence["Ca"],
  Mg = `Mg2+ (mg/L)` / molar_mass["Mg"] * valence["Mg"],
  Na = `Na+ (mg/L)` / molar_mass["Na"] * valence["Na"],
  K = `K+ (mg/L)` / molar_mass["K"] * valence["K"],
  Cl = `Cl- (mg/L)` / molar_mass["Cl"] * valence["Cl"],
  SO4 = `SO42- (mg/L)` / molar_mass["SO4"] * valence["SO4"],
  HCO3 = `HCO3- (mg/L)` / molar_mass["HCO3"] * valence["HCO3"],
  CO3 = `CO32- (mg/L)` / molar_mass["CO3"] * valence["CO3"]
)

6 5. Prepare Data for Ternary/Piper Diagrams

df_ions <- df_meq %>% mutate(
  Cations_total = Ca + Mg + Na + K,
  Ca_pct = Ca / Cations_total * 100,
  Mg_pct = Mg / Cations_total * 100,
  NaK_pct = (Na + K) / Cations_total * 100,

  Anions_total = Cl + SO4 + HCO3 + CO3,
  Cl_pct = Cl / Anions_total * 100,
  SO4_pct = SO4 / Anions_total * 100,
  HCO3CO3_pct = (HCO3 + CO3) / Anions_total * 100
)

6.1 5.1 Piper Diagrams

cat_plot <- ggtern(data = df_ions, aes(x = Ca_pct, y = Mg_pct, z = NaK_pct)) +
  geom_point(color = "blue") +
  labs(title = "Cations") +
  theme_showarrows()

anion_plot <- ggtern(data = df_ions, aes(x = Cl_pct, y = SO4_pct, z = HCO3CO3_pct)) +
  geom_point(color = "red") +
  labs(title = "Anions") +
  theme_showarrows()

diamond_plot <- ggtern(data = df_ions, aes(
  x = (Ca_pct + Cl_pct)/2,
  y = (Mg_pct + SO4_pct)/2,
  z = (NaK_pct + HCO3CO3_pct)/2)) +
  geom_point(color = "purple") +
  labs(title = "Diamond") +
  theme_showarrows()

gridExtra::grid.arrange(cat_plot, anion_plot, diamond_plot, ncol = 3)

7 6. Correlation Matrix

major_elements <- df[, c("Ca2+ (mg/L)", "Mg2+ (mg/L)", "Na+ (mg/L)", "K+ (mg/L)",
                         "Cl- (mg/L)", "SO42- (mg/L)", "HCO3- (mg/L)")]
corr <- cor(major_elements, use = "complete.obs")

ggcorrplot(corr,
           hc.order = TRUE,
           type = "lower",
           lab = TRUE,
           lab_size = 3,
           method = "circle",
           colors = c("blue", "white", "red"),
           title = "Correlation Matrix of Major Elements",
           ggtheme = theme_minimal())

8 7. Pairwise Relationships

df_elements <- df %>% select(
  `pH`, EC, `TDS (mg/L)`, `CO32- (mg/L)`, `HCO3- (mg/L)`, `Cl- (mg/L)`,
  `SO42- (mg/L)`, `NO3- (mg/L)`, `Na+ (mg/L)`, `K+ (mg/L)`,
  `Ca2+ (mg/L)`, `Mg2+ (mg/L)`, `Fe2+, mg/L)`,
  `Total Alkalinity as CaCO3`, `Total Hardness as CaCO3`
)

GGally::ggpairs(df_elements)