Load Libraries

library(readxl)
library(ggplot2)
library(dplyr)
library(ggsignif)
library(FSA)
library(multcompView)

Read and Combine Data

P03 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "P03")
P23 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "P23")
M03 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "M03")
M26 <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "M26")
MM  <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "MetalMass")
BindCap <- read_excel("C:/Users/mnewma11/Desktop/MyMaurepas.xlsx", sheet = "BindingCap")

# Metal columns
metal_cols <- c("Fe","Al","Ca","Mn","Mg","Cu","Ni","Pb","Zn")
mol_cols_no_ca <- c("Fe_mol","Al_mol","Mn_mol","Mg_mol","Cu_mol","Ni_mol","Pb_mol","Zn_mol")
# Convert BindCap row into a named vector
bind_vec <- as.numeric(BindCap[1, ])
names(bind_vec) <- names(BindCap)   # e.g., "Fe_mol", "Al_mol", ...

# Conversion + ratios + weighted binding
convert_to_mol <- function(df) {
  
  df2 <- df %>%
    # mg/kg → mol/kg
    mutate(across(
      all_of(metal_cols),
      ~ (.x / 1000) / MM[[cur_column()]],
      .names = "{.col}_mol"
    )) %>%
    
    # Fe:Al molar ratio
    mutate(Fe_Al_ratio = Fe_mol / Al_mol) %>%
    
    # Sum of Fe + Al mols
    mutate(Fe_Al_sum = Fe_mol + Al_mol)
  
  # Weighted binding capacity
  df2 <- df2 %>%
    mutate(across(
      all_of(names(bind_vec)),     # matches Fe_mol, Al_mol, Mn_mol, ...
      ~ .x * bind_vec[cur_column()],
      .names = "{.col}_weighted"
    )) %>%
    
    # Sum of weighted binding capacity
    rowwise() %>%
    mutate(total_weighted_binding = sum(c_across(ends_with("_weighted")))) %>%
    ungroup()
  
  return(df2)
}


# Apply to each sheet
P03_mol <- convert_to_mol(P03)
P23_mol <- convert_to_mol(P23)
M03_mol <- convert_to_mol(M03)
M26_mol <- convert_to_mol(M26)

combined <- bind_rows(
  P03_mol %>% mutate(site = "P03"),
  P23_mol %>% mutate(site = "P23"),
  M03_mol %>% mutate(site = "M03"),
  M26_mol %>% mutate(site = "M26")
)

# Set site order (edit as needed)
combined$site <- factor(combined$site, levels = c("M03", "M26", "P03", "P23"))

site_colors <- c("P03" = "#E76F51", "P23" = "#F4A261",
                  "M03" = "#2A9D8F", "M26" = "#264653")

Analysis Function

This function runs a Kruskal-Wallis test, Dunn’s post-hoc test with Bonferroni correction, and produces a labeled boxplot for a given variable. No outliers are removed — all data points are retained.

run_analysis <- function(df, value_col, group_col = "site",
                          fill_colors, title, y_label) {

  cat("\n===", value_col, "===\n")

  kw <- kruskal.test(reformulate(group_col, value_col), data = df)
  print(kw)

  dunn_result <- dunnTest(reformulate(group_col, value_col), data = df,
                           method = "bonferroni")
  print(dunn_result)

  p_vals <- dunn_result$res$P.adj
  names(p_vals) <- gsub(" - ", "-", dunn_result$res$Comparison)

  letters_out <- multcompLetters(p_vals)
  letters_df <- data.frame(site = names(letters_out$Letters),
                            letter = letters_out$Letters)

  label_pos <- df %>%
    group_by(.data[[group_col]]) %>%
    summarise(y_pos = max(.data[[value_col]], na.rm = TRUE) * 1.10) %>%
    left_join(letters_df, by = setNames("site", group_col))

  p <- ggplot(df, aes(x = .data[[group_col]], y = .data[[value_col]],
                       fill = .data[[group_col]])) +
    geom_boxplot(width = 0.6, outlier.shape = 21, outlier.size = 2,
                 outlier.color = "black", alpha = 1, linewidth = 0.5) +
    geom_text(data = label_pos, aes(x = .data[[group_col]], y = y_pos, label = letter),
              inherit.aes = FALSE, size = 5, fontface = "bold") +
    scale_fill_manual(values = fill_colors) +
    labs(title = title,
         subtitle = "Kruskal-Wallis w/ Dunn's post-hoc, Bonferroni correction, p < 0.05",
         x = "Site & Year", y = y_label) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(face = "bold", size = 16, hjust = 0),
      plot.subtitle = element_text(color = "black", size = 9),
      axis.title = element_text(face = "bold"),
      axis.line = element_line(colour = "black", linewidth = 0.01),
      panel.grid.major.x = element_blank(),
      legend.position = "none"
    )

  return(p)
}

Total Phosphorus (TP)

p_TP <- run_analysis(combined, "TP", fill_colors = site_colors,
                      title = "Total Phosphorus by Site", y_label = "TP (mg/kg)")
## 
## === TP ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  TP by site
## Kruskal-Wallis chi-squared = 93.965, df = 3, p-value < 2.2e-16
##   Comparison         Z      P.unadj        P.adj
## 1  M03 - M26 -3.874483 1.068512e-04 6.411069e-04
## 2  M03 - P03  2.866586 4.149258e-03 2.489555e-02
## 3  M26 - P03  9.312010 1.254352e-20 7.526112e-20
## 4  M03 - P23  2.131737 3.302848e-02 1.981709e-01
## 5  M26 - P23  8.448664 2.946554e-17 1.767932e-16
## 6  P03 - P23 -1.371784 1.701307e-01 1.000000e+00
p_TP
Total Phosphorus by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Total Phosphorus by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Organic Matter (OM)

p_OM <- run_analysis(combined, "OM", fill_colors = site_colors,
                      title = "Organic Matter by Site", y_label = "OM (%)")
## 
## === OM ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  OM by site
## Kruskal-Wallis chi-squared = 189.42, df = 3, p-value < 2.2e-16
##   Comparison          Z      P.unadj        P.adj
## 1  M03 - M26 -4.8736462 1.095571e-06 6.573428e-06
## 2  M03 - P03  4.3877766 1.145153e-05 6.870918e-05
## 3  M26 - P03 12.7131323 4.999112e-37 2.999467e-36
## 4  M03 - P23 -0.8127981 4.163338e-01 1.000000e+00
## 5  M26 - P23  6.1402736 8.237944e-10 4.942767e-09
## 6  P03 - P23 -9.5088170 1.928430e-21 1.157058e-20
p_OM
Organic Matter by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Organic Matter by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Aluminum (Al)

p_Al <- run_analysis(combined, "Al", fill_colors = site_colors,
                      title = "Aluminium by Site", y_label = "Al (mg/kg)")
## 
## === Al ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Al by site
## Kruskal-Wallis chi-squared = 8.011, df = 3, p-value = 0.04579
##   Comparison          Z    P.unadj      P.adj
## 1  M03 - M26 -0.0444061 0.96458070 1.00000000
## 2  M03 - P03  1.0594045 0.28941559 1.00000000
## 3  M26 - P03  1.4191630 0.15585151 0.93510905
## 4  M03 - P23  1.8408654 0.06564129 0.39384772
## 5  M26 - P23  2.4293963 0.01512399 0.09074394
## 6  P03 - P23  1.4072817 0.15934388 0.95606328
p_Al
Aluminum by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Aluminum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Iron (Fe)

p_Fe <- run_analysis(combined, "Fe", fill_colors = site_colors,
                      title = "Iron by Site", y_label = "Fe (mg/kg)")
## 
## === Fe ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Fe by site
## Kruskal-Wallis chi-squared = 36.834, df = 3, p-value = 4.988e-08
##   Comparison           Z      P.unadj        P.adj
## 1  M03 - M26 -0.87321735 3.825446e-01 1.000000e+00
## 2  M03 - P03  0.01078503 9.913950e-01 1.000000e+00
## 3  M26 - P03  1.28651257 1.982642e-01 1.000000e+00
## 4  M03 - P23 -3.16858181 1.531846e-03 9.191078e-03
## 5  M26 - P23 -2.78174966 5.406673e-03 3.244004e-02
## 6  P03 - P23 -5.77970190 7.483310e-09 4.489986e-08
p_Fe
Iron by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Iron by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Fe/Al Ratio

p_FeAl <- run_analysis(combined, "Fe_Al_ratio", fill_colors = site_colors,
                      title = "Fe/Al Ratio by Site", y_label = "Fe/Al Ratio")
## 
## === Fe_Al_ratio ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Fe_Al_ratio by site
## Kruskal-Wallis chi-squared = 226.89, df = 3, p-value < 2.2e-16
##   Comparison           Z      P.unadj        P.adj
## 1  M03 - M26  -0.7440894 4.568224e-01 1.000000e+00
## 2  M03 - P03  -1.0486195 2.943533e-01 1.000000e+00
## 3  M26 - P03  -0.2561335 7.978478e-01 1.000000e+00
## 4  M03 - P23  -8.3654796 5.987089e-17 3.592253e-16
## 5  M26 - P23  -9.6456650 5.127773e-22 3.076664e-21
## 6  P03 - P23 -13.2876992 2.728394e-40 1.637036e-39
p_FeAl
Fe/Al Ratio by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Fe/Al Ratio by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Fe+Al Sum

p_FeAl2 <- run_analysis(combined, "Fe_Al_sum", fill_colors = site_colors,
                      title = "Fe+Al Sum by Site", y_label = "Fe+Al Sum (mol/kg)")
## 
## === Fe_Al_sum ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Fe_Al_sum by site
## Kruskal-Wallis chi-squared = 2.3357, df = 3, p-value = 0.5057
##   Comparison          Z   P.unadj     P.adj
## 1  M03 - M26 -0.2106278 0.8331777 1.0000000
## 2  M03 - P03  0.8609535 0.3892637 1.0000000
## 3  M26 - P03  1.4077148 0.1592155 0.9552932
## 4  M03 - P23  0.5431785 0.5870069 1.0000000
## 5  M26 - P23  1.0079974 0.3134557 1.0000000
## 6  P03 - P23 -0.5884598 0.5562237 1.0000000
p_FeAl2
Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Weighted Phosphate Binding Metals (exlcuding Ca)

All metals outlined on this sheet are converted to mol/kg multiplied by their molar ratio with phosphate. This is then summed.

p_total_weighted_binding <- run_analysis(combined, "total_weighted_binding", fill_colors = site_colors,
                      title = "Weighted Phosphate Binding Metals (exlcuding Ca) by Site", y_label = "Weighted Phosphate Binding Metals (exlcuding Ca) mol/kg")
## 
## === total_weighted_binding ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_weighted_binding by site
## Kruskal-Wallis chi-squared = 17.301, df = 3, p-value = 0.0006129
##   Comparison          Z      P.unadj       P.adj
## 1  M03 - M26  0.1333511 0.8939156903 1.000000000
## 2  M03 - P03 -0.3115107 0.7554124001 1.000000000
## 3  M26 - P03 -0.5888600 0.5559552094 1.000000000
## 4  M03 - P23 -2.2223366 0.0262605635 0.157563381
## 5  M26 - P23 -3.0276253 0.0024648349 0.014789009
## 6  P03 - P23 -3.4696611 0.0005211155 0.003126693
p_total_weighted_binding
Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Fe+Al Sum by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Manganese (Mn)

p_Mn <- run_analysis(combined, "Mn", fill_colors = site_colors,
                      title = "Manganese by Site", y_label = "Mn (mg/kg)")
## 
## === Mn ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Mn by site
## Kruskal-Wallis chi-squared = 75.873, df = 3, p-value = 2.355e-16
##   Comparison          Z      P.unadj        P.adj
## 1  M03 - M26 -0.7244221 4.688066e-01 1.000000e+00
## 2  M03 - P03  3.4148567 6.381559e-04 3.828935e-03
## 3  M26 - P03  5.4217214 5.902783e-08 3.541670e-07
## 4  M03 - P23  5.2458837 1.555350e-07 9.332099e-07
## 5  M26 - P23  7.8043170 5.982465e-15 3.589479e-14
## 6  P03 - P23  3.2856813 1.017361e-03 6.104163e-03
p_Mn
Iron by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Iron by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Nickel (Ni)

p_Ni <- run_analysis(combined, "Ni", fill_colors = site_colors,
                      title = "Nickel by Site", y_label = "Ni (mg/kg)")
## 
## === Ni ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Ni by site
## Kruskal-Wallis chi-squared = 124.42, df = 3, p-value < 2.2e-16
##   Comparison          Z      P.unadj        P.adj
## 1  M03 - M26 -5.8349263 5.381435e-09 3.228861e-08
## 2  M03 - P03 -0.2194182 8.263243e-01 1.000000e+00
## 3  M26 - P03  8.1922859 2.563108e-16 1.537865e-15
## 4  M03 - P23 -5.2841413 1.262957e-07 7.577744e-07
## 5  M26 - P23  1.8319306 6.696177e-02 4.017706e-01
## 6  P03 - P23 -9.2040830 3.446009e-20 2.067605e-19
p_Ni
Nickel by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Nickel by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Lead (Pb)

p_Pb <- run_analysis(combined, "Pb", fill_colors = site_colors,
                      title = "Lead by Site", y_label = "Pb (mg/kg)")
## 
## === Pb ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Pb by site
## Kruskal-Wallis chi-squared = 149.67, df = 3, p-value < 2.2e-16
##   Comparison           Z      P.unadj        P.adj
## 1  M03 - M26  3.39746061 6.801438e-04 4.080863e-03
## 2  M03 - P03  0.01026038 9.918135e-01 1.000000e+00
## 3  M26 - P03 -4.93871876 7.863753e-07 4.718252e-06
## 4  M03 - P23  6.43189417 1.260235e-10 7.561409e-10
## 5  M26 - P23  3.25144337 1.148206e-03 6.889239e-03
## 6  P03 - P23 11.67334669 1.744239e-31 1.046543e-30
p_Pb
Lead by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Lead by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Copper (Cu)

p_Cu <- run_analysis(combined, "Cu", fill_colors = site_colors,
                      title = "Copper by Site", y_label = "Cu (mg/kg)")
## 
## === Cu ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Cu by site
## Kruskal-Wallis chi-squared = 110.99, df = 3, p-value < 2.2e-16
##   Comparison         Z      P.unadj        P.adj
## 1  M03 - M26  1.277892 2.012874e-01 1.000000e+00
## 2  M03 - P03 -1.079486 2.803712e-01 1.000000e+00
## 3  M26 - P03 -3.242657 1.184207e-03 7.105244e-03
## 4  M03 - P23  4.603891 4.146697e-06 2.488018e-05
## 5  M26 - P23  4.028382 5.616201e-05 3.369720e-04
## 6  P03 - P23 10.344983 4.409936e-25 2.645962e-24
p_Cu
Copper by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Copper by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Magnesium (Mg)

p_Mg <- run_analysis(combined, "Mg", fill_colors = site_colors,
                      title = "Magnesium by Site", y_label = "Mg (mg/kg)")
## 
## === Mg ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Mg by site
## Kruskal-Wallis chi-squared = 65.525, df = 3, p-value = 3.872e-14
##   Comparison         Z      P.unadj        P.adj
## 1  M03 - M26  1.523732 1.275757e-01 7.654545e-01
## 2  M03 - P03 -4.079595 4.511428e-05 2.706857e-04
## 3  M26 - P03 -7.436587 1.033203e-13 6.199217e-13
## 4  M03 - P23 -1.405673 1.598211e-01 9.589267e-01
## 5  M26 - P23 -4.051154 5.096555e-05 3.057933e-04
## 6  P03 - P23  4.911911 9.019291e-07 5.411575e-06
p_Mg
Magnesium by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Magnesium by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Zinc (Zn)

p_Zn <- run_analysis(combined, "Zn", fill_colors = site_colors,
                      title = "Zinc by Site", y_label = "Zn (mg/kg)")
## 
## === Zn ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Zn by site
## Kruskal-Wallis chi-squared = 26.739, df = 3, p-value = 6.677e-06
##   Comparison          Z      P.unadj        P.adj
## 1  M03 - M26 -1.5277831 1.265664e-01 7.593984e-01
## 2  M03 - P03  0.5917994 5.539849e-01 1.000000e+00
## 3  M26 - P03  2.9833718 2.850914e-03 1.710548e-02
## 4  M03 - P23  2.1001670 3.571415e-02 2.142849e-01
## 5  M26 - P23  4.9489574 7.461208e-07 4.476725e-06
## 6  P03 - P23  2.7345430 6.246694e-03 3.748016e-02
p_Zn
Zinc by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Zinc by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Calcium (Ca)

p_Ca <- run_analysis(combined, "Ca", fill_colors = site_colors,
                      title = "Calcium by Site", y_label = "Ca (mg/kg)")
## 
## === Ca ===
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Ca by site
## Kruskal-Wallis chi-squared = 81.125, df = 3, p-value < 2.2e-16
##   Comparison         Z      P.unadj        P.adj
## 1  M03 - M26  1.313580 1.889875e-01 1.000000e+00
## 2  M03 - P03 -2.878858 3.991178e-03 2.394707e-02
## 3  M26 - P03 -5.595156 2.204237e-08 1.322542e-07
## 4  M03 - P23 -4.958544 7.102336e-07 4.261402e-06
## 5  M26 - P23 -8.303783 1.008478e-16 6.050869e-16
## 6  P03 - P23 -3.744424 1.808078e-04 1.084847e-03
p_Ca
Calcium by site. Boxes sharing a letter are not significantly different (Dunn's test, p < 0.05).

Calcium by site. Boxes sharing a letter are not significantly different (Dunn’s test, p < 0.05).

Notes

  • All outliers are retained in the boxplots and in the statistical tests.
  • Letters above each box come from a compact letter display (multcompLetters) based on Dunn’s pairwise comparisons; groups sharing a letter are not significantly different from each other.