library(readxl)
library(ggplot2)
library(dplyr)
library(ggsignif)
library(FSA)
library(multcompView)
P03 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "P03")
P23 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "P23")
M03 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "M03")
M26 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "M26")
MM <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "MetalMass")
BindCap <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "BindingCap")
# Metal columns
metal_cols <- c("Fe","Al","Ca","Mn","Mg","Cu","Ni","Pb","Zn")
mol_cols_no_ca <- c("Fe_mol","Al_mol","Mn_mol","Mg_mol","Cu_mol","Ni_mol","Pb_mol","Zn_mol")
# Convert BindCap row into a named vector
bind_vec <- as.numeric(BindCap[1, ])
names(bind_vec) <- names(BindCap) # e.g., "Fe_mol", "Al_mol", ...
# Conversion + ratios + weighted binding
convert_to_mol <- function(df) {
df2 <- df %>%
# mg/kg → mol/kg
mutate(across(
all_of(metal_cols),
~ (.x / 1000) / MM[[cur_column()]],
.names = "{.col}_mol"
)) %>%
# Fe:Al molar ratio
mutate(Fe_Al_ratio = Fe_mol / Al_mol) %>%
# Sum of Fe + Al mols
mutate(Fe_Al_sum = Fe_mol + Al_mol)
# Weighted binding capacity
df2 <- df2 %>%
mutate(across(
all_of(names(bind_vec)), # matches Fe_mol, Al_mol, Mn_mol, ...
~ .x * bind_vec[cur_column()],
.names = "{.col}_weighted"
)) %>%
# Sum of weighted binding capacity
rowwise() %>%
mutate(total_weighted_binding = sum(c_across(ends_with("_weighted")))) %>%
ungroup()
return(df2)
}
# Apply to each sheet
P03_mol <- convert_to_mol(P03)
P23_mol <- convert_to_mol(P23)
M03_mol <- convert_to_mol(M03)
M26_mol <- convert_to_mol(M26)
combined <- bind_rows(
P03_mol %>% mutate(site = "P03"),
P23_mol %>% mutate(site = "P23"),
M03_mol %>% mutate(site = "M03"),
M26_mol %>% mutate(site = "M26")
)
# Set site order (edit as needed)
combined$site <- factor(combined$site, levels = c("M03", "M26", "P03", "P23"))
site_colors <- c("P03" = "#E76F51", "P23" = "#F4A261",
"M03" = "#2A9D8F", "M26" = "#264653")
This function runs a Kruskal-Wallis test, Dunn’s post-hoc test with Bonferroni correction, and produces a labeled boxplot for a given variable. No outliers are removed — all data points are retained.
run_analysis <- function(df, value_col, group_col = "site",
fill_colors, title, y_label) {
cat("\n===", value_col, "===\n")
kw <- kruskal.test(reformulate(group_col, value_col), data = df)
print(kw)
dunn_result <- dunnTest(reformulate(group_col, value_col), data = df,
method = "bonferroni")
print(dunn_result)
p_vals <- dunn_result$res$P.adj
names(p_vals) <- gsub(" - ", "-", dunn_result$res$Comparison)
letters_out <- multcompLetters(p_vals)
letters_df <- data.frame(site = names(letters_out$Letters),
letter = letters_out$Letters)
label_pos <- df %>%
group_by(.data[[group_col]]) %>%
summarise(y_pos = max(.data[[value_col]], na.rm = TRUE) * 1.10) %>%
left_join(letters_df, by = setNames("site", group_col))
p <- ggplot(df, aes(x = .data[[group_col]], y = .data[[value_col]],
fill = .data[[group_col]])) +
geom_boxplot(width = 0.6, outlier.shape = 21, outlier.size = 2,
outlier.color = "black", alpha = 1, linewidth = 0.5) +
geom_text(data = label_pos, aes(x = .data[[group_col]], y = y_pos, label = letter),
inherit.aes = FALSE, size = 5, fontface = "bold") +
scale_fill_manual(values = fill_colors) +
labs(title = title,
subtitle = "Kruskal-Wallis w/ Dunn's post-hoc, Bonferroni correction, p < 0.05",
x = "Site & Year", y = y_label) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16, hjust = 0),
plot.subtitle = element_text(color = "black", size = 9),
axis.title = element_text(face = "bold"),
axis.line = element_line(colour = "black", linewidth = 0.01),
panel.grid.major.x = element_blank(),
legend.position = "none"
)
return(p)
}
p_TP <- run_analysis(combined, "TP", fill_colors = site_colors,
title = "Total Phosphorus by Site", y_label = "TP (mg/kg)")
##
## === TP ===
##
## Kruskal-Wallis rank sum test
##
## data: TP by site
## Kruskal-Wallis chi-squared = 93.965, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -3.874483 1.068512e-04 6.411069e-04
## 2 M03 - P03 2.866586 4.149258e-03 2.489555e-02
## 3 M26 - P03 9.312010 1.254352e-20 7.526112e-20
## 4 M03 - P23 2.131737 3.302848e-02 1.981709e-01
## 5 M26 - P23 8.448664 2.946554e-17 1.767932e-16
## 6 P03 - P23 -1.371784 1.701307e-01 1.000000e+00
p_TP
Total Phosphorus by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_OM <- run_analysis(combined, "OM", fill_colors = site_colors,
title = "Organic Matter by Site", y_label = "OM (%)")
##
## === OM ===
##
## Kruskal-Wallis rank sum test
##
## data: OM by site
## Kruskal-Wallis chi-squared = 189.42, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -4.8736462 1.095571e-06 6.573428e-06
## 2 M03 - P03 4.3877766 1.145153e-05 6.870918e-05
## 3 M26 - P03 12.7131323 4.999112e-37 2.999467e-36
## 4 M03 - P23 -0.8127981 4.163338e-01 1.000000e+00
## 5 M26 - P23 6.1402736 8.237944e-10 4.942767e-09
## 6 P03 - P23 -9.5088170 1.928430e-21 1.157058e-20
p_OM
Organic Matter by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Al <- run_analysis(combined, "Al", fill_colors = site_colors,
title = "Aluminium by Site", y_label = "Al (mg/kg)")
##
## === Al ===
##
## Kruskal-Wallis rank sum test
##
## data: Al by site
## Kruskal-Wallis chi-squared = 8.011, df = 3, p-value = 0.04579
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -0.0444061 0.96458070 1.00000000
## 2 M03 - P03 1.0594045 0.28941559 1.00000000
## 3 M26 - P03 1.4191630 0.15585151 0.93510905
## 4 M03 - P23 1.8408654 0.06564129 0.39384772
## 5 M26 - P23 2.4293963 0.01512399 0.09074394
## 6 P03 - P23 1.4072817 0.15934388 0.95606328
p_Al
Aluminum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Fe <- run_analysis(combined, "Fe", fill_colors = site_colors,
title = "Iron by Site", y_label = "Fe (mg/kg)")
##
## === Fe ===
##
## Kruskal-Wallis rank sum test
##
## data: Fe by site
## Kruskal-Wallis chi-squared = 36.834, df = 3, p-value = 4.988e-08
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -0.87321735 3.825446e-01 1.000000e+00
## 2 M03 - P03 0.01078503 9.913950e-01 1.000000e+00
## 3 M26 - P03 1.28651257 1.982642e-01 1.000000e+00
## 4 M03 - P23 -3.16858181 1.531846e-03 9.191078e-03
## 5 M26 - P23 -2.78174966 5.406673e-03 3.244004e-02
## 6 P03 - P23 -5.77970190 7.483310e-09 4.489986e-08
p_Fe
Iron by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_FeAl <- run_analysis(combined, "Fe_Al_ratio", fill_colors = site_colors,
title = "Fe/Al Ratio by Site", y_label = "Fe/Al Ratio")
##
## === Fe_Al_ratio ===
##
## Kruskal-Wallis rank sum test
##
## data: Fe_Al_ratio by site
## Kruskal-Wallis chi-squared = 226.89, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -0.7440894 4.568224e-01 1.000000e+00
## 2 M03 - P03 -1.0486195 2.943533e-01 1.000000e+00
## 3 M26 - P03 -0.2561335 7.978478e-01 1.000000e+00
## 4 M03 - P23 -8.3654796 5.987089e-17 3.592253e-16
## 5 M26 - P23 -9.6456650 5.127773e-22 3.076664e-21
## 6 P03 - P23 -13.2876992 2.728394e-40 1.637036e-39
p_FeAl
Fe/Al Ratio by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_FeAl2 <- run_analysis(combined, "Fe_Al_sum", fill_colors = site_colors,
title = "Fe+Al Sum by Site", y_label = "Fe+Al Sum (mol/kg)")
##
## === Fe_Al_sum ===
##
## Kruskal-Wallis rank sum test
##
## data: Fe_Al_sum by site
## Kruskal-Wallis chi-squared = 2.3357, df = 3, p-value = 0.5057
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -0.2106278 0.8331777 1.0000000
## 2 M03 - P03 0.8609535 0.3892637 1.0000000
## 3 M26 - P03 1.4077148 0.1592155 0.9552932
## 4 M03 - P23 0.5431785 0.5870069 1.0000000
## 5 M26 - P23 1.0079974 0.3134557 1.0000000
## 6 P03 - P23 -0.5884598 0.5562237 1.0000000
p_FeAl2
Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
All metals outlined on this sheet are converted to mol/kg multiplied by their molar ratio with phosphate. This is then summed.
p_total_weighted_binding <- run_analysis(combined, "total_weighted_binding", fill_colors = site_colors,
title = "Weighted Phosphate Binding Metals (exlcuding Ca) by Site", y_label = "Weighted Phosphate Binding Metals (exlcuding Ca) mol/kg")
##
## === total_weighted_binding ===
##
## Kruskal-Wallis rank sum test
##
## data: total_weighted_binding by site
## Kruskal-Wallis chi-squared = 17.301, df = 3, p-value = 0.0006129
## Comparison Z P.unadj P.adj
## 1 M03 - M26 0.1333511 0.8939156903 1.000000000
## 2 M03 - P03 -0.3115107 0.7554124001 1.000000000
## 3 M26 - P03 -0.5888600 0.5559552094 1.000000000
## 4 M03 - P23 -2.2223366 0.0262605635 0.157563381
## 5 M26 - P23 -3.0276253 0.0024648349 0.014789009
## 6 P03 - P23 -3.4696611 0.0005211155 0.003126693
p_total_weighted_binding
Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Mn <- run_analysis(combined, "Mn", fill_colors = site_colors,
title = "Manganese by Site", y_label = "Mn (mg/kg)")
##
## === Mn ===
##
## Kruskal-Wallis rank sum test
##
## data: Mn by site
## Kruskal-Wallis chi-squared = 75.873, df = 3, p-value = 2.355e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -0.7244221 4.688066e-01 1.000000e+00
## 2 M03 - P03 3.4148567 6.381559e-04 3.828935e-03
## 3 M26 - P03 5.4217214 5.902783e-08 3.541670e-07
## 4 M03 - P23 5.2458837 1.555350e-07 9.332099e-07
## 5 M26 - P23 7.8043170 5.982465e-15 3.589479e-14
## 6 P03 - P23 3.2856813 1.017361e-03 6.104163e-03
p_Mn
Iron by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Ni <- run_analysis(combined, "Ni", fill_colors = site_colors,
title = "Nickel by Site", y_label = "Ni (mg/kg)")
##
## === Ni ===
##
## Kruskal-Wallis rank sum test
##
## data: Ni by site
## Kruskal-Wallis chi-squared = 124.42, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -5.8349263 5.381435e-09 3.228861e-08
## 2 M03 - P03 -0.2194182 8.263243e-01 1.000000e+00
## 3 M26 - P03 8.1922859 2.563108e-16 1.537865e-15
## 4 M03 - P23 -5.2841413 1.262957e-07 7.577744e-07
## 5 M26 - P23 1.8319306 6.696177e-02 4.017706e-01
## 6 P03 - P23 -9.2040830 3.446009e-20 2.067605e-19
p_Ni
Nickel by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Pb <- run_analysis(combined, "Pb", fill_colors = site_colors,
title = "Lead by Site", y_label = "Pb (mg/kg)")
##
## === Pb ===
##
## Kruskal-Wallis rank sum test
##
## data: Pb by site
## Kruskal-Wallis chi-squared = 149.67, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 3.39746061 6.801438e-04 4.080863e-03
## 2 M03 - P03 0.01026038 9.918135e-01 1.000000e+00
## 3 M26 - P03 -4.93871876 7.863753e-07 4.718252e-06
## 4 M03 - P23 6.43189417 1.260235e-10 7.561409e-10
## 5 M26 - P23 3.25144337 1.148206e-03 6.889239e-03
## 6 P03 - P23 11.67334669 1.744239e-31 1.046543e-30
p_Pb
Lead by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Cu <- run_analysis(combined, "Cu", fill_colors = site_colors,
title = "Copper by Site", y_label = "Cu (mg/kg)")
##
## === Cu ===
##
## Kruskal-Wallis rank sum test
##
## data: Cu by site
## Kruskal-Wallis chi-squared = 110.99, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 1.277892 2.012874e-01 1.000000e+00
## 2 M03 - P03 -1.079486 2.803712e-01 1.000000e+00
## 3 M26 - P03 -3.242657 1.184207e-03 7.105244e-03
## 4 M03 - P23 4.603891 4.146697e-06 2.488018e-05
## 5 M26 - P23 4.028382 5.616201e-05 3.369720e-04
## 6 P03 - P23 10.344983 4.409936e-25 2.645962e-24
p_Cu
Copper by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Mg <- run_analysis(combined, "Mg", fill_colors = site_colors,
title = "Magnesium by Site", y_label = "Mg (mg/kg)")
##
## === Mg ===
##
## Kruskal-Wallis rank sum test
##
## data: Mg by site
## Kruskal-Wallis chi-squared = 65.525, df = 3, p-value = 3.872e-14
## Comparison Z P.unadj P.adj
## 1 M03 - M26 1.523732 1.275757e-01 7.654545e-01
## 2 M03 - P03 -4.079595 4.511428e-05 2.706857e-04
## 3 M26 - P03 -7.436587 1.033203e-13 6.199217e-13
## 4 M03 - P23 -1.405673 1.598211e-01 9.589267e-01
## 5 M26 - P23 -4.051154 5.096555e-05 3.057933e-04
## 6 P03 - P23 4.911911 9.019291e-07 5.411575e-06
p_Mg
Magnesium by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Zn <- run_analysis(combined, "Zn", fill_colors = site_colors,
title = "Zinc by Site", y_label = "Zn (mg/kg)")
##
## === Zn ===
##
## Kruskal-Wallis rank sum test
##
## data: Zn by site
## Kruskal-Wallis chi-squared = 26.739, df = 3, p-value = 6.677e-06
## Comparison Z P.unadj P.adj
## 1 M03 - M26 -1.5277831 1.265664e-01 7.593984e-01
## 2 M03 - P03 0.5917994 5.539849e-01 1.000000e+00
## 3 M26 - P03 2.9833718 2.850914e-03 1.710548e-02
## 4 M03 - P23 2.1001670 3.571415e-02 2.142849e-01
## 5 M26 - P23 4.9489574 7.461208e-07 4.476725e-06
## 6 P03 - P23 2.7345430 6.246694e-03 3.748016e-02
p_Zn
Zinc by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
p_Ca <- run_analysis(combined, "Ca", fill_colors = site_colors,
title = "Calcium by Site", y_label = "Ca (mg/kg)")
##
## === Ca ===
##
## Kruskal-Wallis rank sum test
##
## data: Ca by site
## Kruskal-Wallis chi-squared = 81.125, df = 3, p-value < 2.2e-16
## Comparison Z P.unadj P.adj
## 1 M03 - M26 1.313580 1.889875e-01 1.000000e+00
## 2 M03 - P03 -2.878858 3.991178e-03 2.394707e-02
## 3 M26 - P03 -5.595156 2.204237e-08 1.322542e-07
## 4 M03 - P23 -4.958544 7.102336e-07 4.261402e-06
## 5 M26 - P23 -8.303783 1.008478e-16 6.050869e-16
## 6 P03 - P23 -3.744424 1.808078e-04 1.084847e-03
p_Ca
Calcium by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).
multcompLetters) based on Dunn’s pairwise comparisons;
groups sharing a letter are not significantly different
from each other.