1. Load Required
Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(ggtern)
## Registered S3 methods overwritten by 'ggtern':
## method from
## grid.draw.ggplot ggplot2
## plot.ggplot ggplot2
## print.ggplot ggplot2
## --
## Remember to cite, run citation(package = 'ggtern') for further info.
## --
##
## Attaching package: 'ggtern'
## The following objects are masked from 'package:ggplot2':
##
## aes, annotate, ggplot, ggplot_build, ggplot_gtable, ggplotGrob,
## ggsave, layer_data, theme_bw, theme_classic, theme_dark,
## theme_gray, theme_light, theme_linedraw, theme_minimal, theme_void
#library(chemWater)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggcorrplot)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
2. Load and Inspect
Data
df <- readxl::read_excel("normalized_data (5).xlsx")
Inspect structure
str(df)
## tibble [286 × 18] (S3: tbl_df/tbl/data.frame)
## $ pH : num [1:286] -0.1675 0.1964 -0.8434 -0.0809 -0.7048 ...
## $ EC : num [1:286] -0.136 -0.225 -0.564 -0.323 -0.297 ...
## $ TDS (mg/L) : num [1:286] 0.0338 -0.1919 -0.5234 -0.1776 -0.1533 ...
## $ CO32- (mg/L) : num [1:286] -0.302 -0.302 -0.302 -0.302 -0.302 ...
## $ HCO3- (mg/L) : num [1:286] -0.182 0.124 -0.773 -0.15 -0.397 ...
## $ Cl- (mg/L) : num [1:286] -0.151 -0.43 -0.414 -0.422 -0.168 ...
## $ SO42- (mg/L) : num [1:286] 0.3545 -0.3841 -0.1045 0.0415 0.1102 ...
## $ NO3- (mg/L) : num [1:286] -0.0835 -0.1745 -0.0699 0.0607 -0.0931 ...
## $ Na+ (mg/L) : num [1:286] -0.0807 -0.4944 -0.4558 -0.2683 -0.2297 ...
## $ K+ (mg/L) : num [1:286] -0.443 0.561 -1.017 -0.759 -0.586 ...
## $ Ca2+ (mg/L) : num [1:286] -0.11733 0.84876 -0.53108 -0.00727 -0.20905 ...
## $ Mg2+ (mg/L) : num [1:286] 0.01535 -0.3031 -0.67527 -0.02685 -0.00767 ...
## $ Fe2+, mg/L) : num [1:286] -0.373 0.367 0.33 -0.432 -0.105 ...
## $ Total Alkalinity as CaCO3: num [1:286] -0.196 0.11 -0.787 -0.163 -0.411 ...
## $ Total Hardness as CaCO3 : num [1:286] -0.0626 0.3397 -0.6464 -0.0207 -0.1254 ...
## $ Sample Code : chr [1:286] "USGW001_D" "USGW001_W" "USGW002" "USGW003" ...
## $ Date : chr [1:286] "42903" "43163" "43163" "42902" ...
## $ Season : chr [1:286] "Dry" "Wet" "Wet" "Dry" ...
summary(df)
## pH EC TDS (mg/L) CO32- (mg/L)
## Min. :-3.18308 Min. :-0.9277 Min. :-0.8792 Min. :-0.3019
## 1st Qu.:-0.64847 1st Qu.:-0.5854 1st Qu.:-0.5530 1st Qu.:-0.3019
## Median : 0.02309 Median :-0.2142 Median :-0.2183 Median :-0.3019
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.46936 3rd Qu.: 0.1792 3rd Qu.: 0.1321 3rd Qu.:-0.3019
## Max. : 3.85317 Max. : 9.4140 Max. : 9.2411 Max. : 6.9619
## HCO3- (mg/L) Cl- (mg/L) SO42- (mg/L) NO3- (mg/L)
## Min. :-1.3159 Min. :-0.50167 Min. :-0.6379 Min. :-0.19653
## 1st Qu.:-0.7431 1st Qu.:-0.40996 1st Qu.:-0.5296 1st Qu.:-0.18717
## Median :-0.1206 Median :-0.32401 Median :-0.3655 Median :-0.15614
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.5716 3rd Qu.:-0.02678 3rd Qu.: 0.1095 3rd Qu.:-0.08715
## Max. : 6.6592 Max. :11.93478 Max. : 6.8345 Max. :10.79594
## Na+ (mg/L) K+ (mg/L) Ca2+ (mg/L) Mg2+ (mg/L)
## Min. :-0.55512 Min. :-1.2747 Min. :-1.4625 Min. :-1.1625
## 1st Qu.:-0.45293 1st Qu.:-0.6725 1st Qu.:-0.7160 1st Qu.:-0.7136
## Median :-0.34550 Median :-0.2567 Median :-0.1387 Median :-0.2801
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.02058 3rd Qu.: 0.4459 3rd Qu.: 0.4503 3rd Qu.: 0.3338
## Max. :10.41159 Max. : 7.5863 Max. : 3.1723 Max. : 4.0938
## Fe2+, mg/L) Total Alkalinity as CaCO3 Total Hardness as CaCO3
## Min. :-0.6196 Min. :-1.3303 Min. :-1.4205
## 1st Qu.:-0.5031 1st Qu.:-0.7509 1st Qu.:-0.7092
## Median :-0.3126 Median :-0.1200 Median :-0.1386
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.1837 3rd Qu.: 0.5646 3rd Qu.: 0.4963
## Max. :12.5103 Max. : 6.6480 Max. : 3.9154
## Sample Code Date Season
## Length:286 Length:286 Length:286
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
head(df)
## # A tibble: 6 × 18
## pH EC `TDS (mg/L)` `CO32- (mg/L)` `HCO3- (mg/L)` `Cl- (mg/L)`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -0.168 -0.136 0.0338 -0.302 -0.182 -0.151
## 2 0.196 -0.225 -0.192 -0.302 0.124 -0.430
## 3 -0.843 -0.564 -0.523 -0.302 -0.773 -0.414
## 4 -0.0809 -0.323 -0.178 -0.302 -0.150 -0.422
## 5 -0.705 -0.297 -0.153 -0.302 -0.397 -0.168
## 6 0.318 0.198 0.222 -0.0170 0.297 0.0405
## # ℹ 12 more variables: `SO42- (mg/L)` <dbl>, `NO3- (mg/L)` <dbl>,
## # `Na+ (mg/L)` <dbl>, `K+ (mg/L)` <dbl>, `Ca2+ (mg/L)` <dbl>,
## # `Mg2+ (mg/L)` <dbl>, `Fe2+, mg/L)` <dbl>,
## # `Total Alkalinity as CaCO3` <dbl>, `Total Hardness as CaCO3` <dbl>,
## # `Sample Code` <chr>, Date <chr>, Season <chr>
3. Principal Component
Analysis (PCA)
hydro_vars <- df[, 1:15]
pca_result <- prcomp(hydro_vars, scale. = FALSE)
# PCA summary
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.8880 1.3246 1.13151 0.98030 0.95006 0.80721 0.62597
## Proportion of Variance 0.5541 0.1166 0.08506 0.06384 0.05996 0.04329 0.02603
## Cumulative Proportion 0.5541 0.6706 0.75570 0.81955 0.87951 0.92280 0.94883
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.5820 0.50528 0.39430 0.11265 0.06821 0.05837 4.699e-16
## Proportion of Variance 0.0225 0.01696 0.01033 0.00084 0.00031 0.00023 0.000e+00
## Cumulative Proportion 0.9713 0.98829 0.99862 0.99946 0.99977 1.00000 1.000e+00
## PC15
## Standard deviation 2.93e-16
## Proportion of Variance 0.00e+00
## Cumulative Proportion 1.00e+00
# Loadings
loadings <- pca_result$rotation
round(loadings[, 1:3], 2)
## PC1 PC2 PC3
## pH 0.02 0.68 -0.04
## EC 0.34 -0.03 -0.15
## TDS (mg/L) 0.34 -0.05 -0.16
## CO32- (mg/L) 0.02 0.63 -0.21
## HCO3- (mg/L) 0.32 -0.03 0.21
## Cl- (mg/L) 0.30 -0.06 -0.35
## SO42- (mg/L) 0.28 0.00 -0.25
## NO3- (mg/L) -0.01 -0.24 -0.26
## Na+ (mg/L) 0.30 -0.04 -0.35
## K+ (mg/L) 0.19 0.23 0.09
## Ca2+ (mg/L) 0.27 -0.01 0.41
## Mg2+ (mg/L) 0.30 0.02 0.20
## Fe2+, mg/L) 0.10 -0.09 -0.36
## Total Alkalinity as CaCO3 0.32 0.00 0.20
## Total Hardness as CaCO3 0.31 0.01 0.33
3.1 Scree Plot and
Biplot
# Scree plot
plot(pca_result, type = "l", main = "Scree Plot")

# Biplot
factoextra::fviz_pca_biplot(pca_result,
repel = TRUE,
col.var = "red",
col.ind = "blue",
geom.ind = "point",
pointsize = 2,
legend.title = "Groups")

4. Convert mg/L to
meq/L
molar_mass <- c(Ca = 40.08, Mg = 24.31, Na = 22.99, K = 39.10, Cl = 35.45,
SO4 = 96.06, HCO3 = 61.02, CO3 = 60.01)
valence <- c(Ca = 2, Mg = 2, Na = 1, K = 1, Cl = 1, SO4 = 2, HCO3 = 1, CO3 = 2)
df_meq <- df %>% mutate(
Ca = `Ca2+ (mg/L)` / molar_mass["Ca"] * valence["Ca"],
Mg = `Mg2+ (mg/L)` / molar_mass["Mg"] * valence["Mg"],
Na = `Na+ (mg/L)` / molar_mass["Na"] * valence["Na"],
K = `K+ (mg/L)` / molar_mass["K"] * valence["K"],
Cl = `Cl- (mg/L)` / molar_mass["Cl"] * valence["Cl"],
SO4 = `SO42- (mg/L)` / molar_mass["SO4"] * valence["SO4"],
HCO3 = `HCO3- (mg/L)` / molar_mass["HCO3"] * valence["HCO3"],
CO3 = `CO32- (mg/L)` / molar_mass["CO3"] * valence["CO3"]
)
5. Prepare Data for
Ternary/Piper Diagrams
df_ions <- df_meq %>% mutate(
Cations_total = Ca + Mg + Na + K,
Ca_pct = Ca / Cations_total * 100,
Mg_pct = Mg / Cations_total * 100,
NaK_pct = (Na + K) / Cations_total * 100,
Anions_total = Cl + SO4 + HCO3 + CO3,
Cl_pct = Cl / Anions_total * 100,
SO4_pct = SO4 / Anions_total * 100,
HCO3CO3_pct = (HCO3 + CO3) / Anions_total * 100
)
5.1 Piper
Diagrams
cat_plot <- ggtern(data = df_ions, aes(x = Ca_pct, y = Mg_pct, z = NaK_pct)) +
geom_point(color = "blue") +
labs(title = "Cations") +
theme_showarrows()
anion_plot <- ggtern(data = df_ions, aes(x = Cl_pct, y = SO4_pct, z = HCO3CO3_pct)) +
geom_point(color = "red") +
labs(title = "Anions") +
theme_showarrows()
diamond_plot <- ggtern(data = df_ions, aes(
x = (Ca_pct + Cl_pct)/2,
y = (Mg_pct + SO4_pct)/2,
z = (NaK_pct + HCO3CO3_pct)/2)) +
geom_point(color = "purple") +
labs(title = "Diamond") +
theme_showarrows()
gridExtra::grid.arrange(cat_plot, anion_plot, diamond_plot, ncol = 3)

6. Correlation
Matrix
major_elements <- df[, c("Ca2+ (mg/L)", "Mg2+ (mg/L)", "Na+ (mg/L)", "K+ (mg/L)",
"Cl- (mg/L)", "SO42- (mg/L)", "HCO3- (mg/L)")]
corr <- cor(major_elements, use = "complete.obs")
ggcorrplot(corr,
hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method = "circle",
colors = c("blue", "white", "red"),
title = "Correlation Matrix of Major Elements",
ggtheme = theme_minimal())

7. Pairwise
Relationships
df_elements <- df %>% select(
`pH`, EC, `TDS (mg/L)`, `CO32- (mg/L)`, `HCO3- (mg/L)`, `Cl- (mg/L)`,
`SO42- (mg/L)`, `NO3- (mg/L)`, `Na+ (mg/L)`, `K+ (mg/L)`,
`Ca2+ (mg/L)`, `Mg2+ (mg/L)`, `Fe2+, mg/L)`,
`Total Alkalinity as CaCO3`, `Total Hardness as CaCO3`
)
GGally::ggpairs(df_elements)
