Automated Belleair Chloride Report

Author

Ayesha Naveed

Published

May 16, 2025

Load packages

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
library(purrr)
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(scales)

Attaching package: 'scales'
The following object is masked from 'package:purrr':

    discard
library(here)
here() starts at /Users/ayeshanaveed/Downloads
library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(tibble)
library(tidyr)  

data_dir <- here("chloride_data")
excel_file <- here("BelleairChlorideReport.xlsx")

Clean Excel sheets

# Get sheet names
sheet_names <- excel_sheets(excel_file)

# Function to read each sheet with some warning suppression
read_sheet_safe <- function(sheet) {
  suppressMessages(
    suppressWarnings(
      read_excel(excel_file, sheet = sheet, .name_repair = "universal", guess_max = 10000)
    )
  )
}

# Read all sheets
results <- map(sheet_names, safely(read_sheet_safe))
names(results) <- sheet_names

# Identify sheets that failed to import (have only charts/figures)
failed_sheets <- names(results)[map_lgl(results, ~ !is.null(.x$error))]
print(failed_sheets)
character(0)
# Exclude sheets with only charts/figures
valid_sheets <- setdiff(sheet_names, failed_sheets)

# Read only sheets with data
all_data <- map(valid_sheets, read_sheet_safe) %>% set_names(valid_sheets)

CL Limits

cl_limits_all <- tibble::tribble(
  ~well_number, ~start_date, ~end_date, ~cl_value,
  2, as.Date("2007-07-01"), as.Date("2018-01-07"), 120,
  2, as.Date("2018-01-08"), as.Date("2021-12-06"), 135,
  3, as.Date("2007-07-01"), as.Date("2022-12-05"), 250,
  5, as.Date("2007-07-01"), as.Date("2017-12-03"), 140,
  5, as.Date("2017-12-03"), as.Date("2023-11-06"), 170,
  6, as.Date("2005-01-16"), as.Date("2017-11-06"), 130,
  6, as.Date("2017-11-06"), as.Date("2023-11-06"), 410,
  7, as.Date("2007-07-01"), as.Date("2017-11-06"), 170,
  7, as.Date("2017-12-04"), as.Date("2023-11-06"), 225,
  9, as.Date("2007-07-01"), as.Date("2017-11-06"), 100,
  9, as.Date("2017-11-06"), as.Date("2023-11-06"), 110,
  10, as.Date("2007-07-01"), as.Date("2017-11-06"), 80,
  10, as.Date("2017-12-04"), as.Date("2023-11-06"), 110
)

Function 1 - Plotting Period of Record for Production Wells

plot_prod_well <- function(well_number, all_data, cl_limits_all) {
  sheet_name <- paste0("Cl Well ", well_number)
  if (!sheet_name %in% names(all_data)) {
    stop(paste("Sheet", sheet_name, "not found in all_data."))
  }
  df <- all_data[[sheet_name]]
  df <- df[, !duplicated(names(df))]
  col_names <- names(df)
  
  # Find columns
  date_col   <- col_names[grepl("^Date$", col_names, ignore.case = TRUE)][1]
  cl_col     <- col_names[grepl(paste0("Well\\.", well_number, "\\.CL"), col_names)][1]
  loess_col  <- col_names[grepl("^LOESS", col_names)][1]
  
  if (is.na(date_col) | is.na(cl_col) | is.na(loess_col)) {
    stop("Could not find one or more required columns in the sheet: ",
         paste(c(date_col, cl_col, loess_col), collapse = ", "))
  }
  print(colnames(df))
  
  # Take care of Excel serial dates and filter date range
  df <- df %>%
    mutate(
      Date = if (inherits(.data[[date_col]], "Date")) {
        .data[[date_col]]
      } else if (inherits(.data[[date_col]], "POSIXct") || inherits(.data[[date_col]], "POSIXlt")) {
        as.Date(.data[[date_col]])
      } else if (is.numeric(.data[[date_col]])) {
        as.Date(.data[[date_col]], origin = "1899-12-30")
      } else {
        as.Date(as.character(.data[[date_col]]), format = "%m/%d/%y")
      },
      CL = as.numeric(.data[[cl_col]]),
      LOESS = as.numeric(.data[[loess_col]])
    ) %>%
    filter(
      !is.na(Date) & 
      Date > as.Date("1970-01-01") & Date < as.Date("2030-01-01") &
      !is.na(CL) & !is.na(LOESS)
    )
  
  # For x-axis: ticks at every January & labels every 5th year
  jan_years <- seq(
    from = as.Date(paste0(format(min(df$Date, na.rm=TRUE), "%Y"), "-01-01")),
    to   = as.Date(paste0(format(max(df$Date, na.rm=TRUE), "%Y"), "-01-01")),
    by = "year"
  )
  
  # Filter CL limits for each well
  cl_limits_df <- cl_limits_all %>%
    filter(well_number == !!well_number)
  
  p <- ggplot(df, aes(x = Date)) +
    geom_point(
      aes(y = CL, fill = "Chloride Concentrations"),
      shape = 23, size = 0.5, alpha = 0.5, color = "darkblue", stroke = 1.2
    ) +
    geom_line(aes(y = LOESS, color = "LOESS (Chloride Concentrations)"), size = 1) +
    geom_segment(
      data = cl_limits_df,
      aes(x = start_date, xend = end_date, y = cl_value, yend = cl_value, color = "CL Limit"),
      inherit.aes = FALSE,
      size = 1.2
    ) +
    scale_x_date(
      breaks = jan_years,
      labels = function(d) {
        y <- as.numeric(format(d, "%Y"))
        ifelse(y %% 5 == 0, format(d, "%b-%y"), "")
      },
      expand = expansion(mult = c(0.01, 0.01))
    ) +
    scale_color_manual(
      values = c(
        "LOESS (Chloride Concentrations)" = "magenta",
        "CL Limit" = "red"
      )
    ) +
    scale_fill_manual(
      values = c(
        "Chloride Concentrations" = "lightblue"
      )
    ) +
    labs(
      title = paste("Period of Record - Production Well", well_number),
      x = "Date",
      y = "Chloride Concentration (mg/L)",
      color = NULL,
      fill = NULL
    ) +
    theme_minimal() +
    theme(
      legend.position = "top",
      axis.text.x = element_text(angle = 0, hjust = 0.5)
    )
  
  print(p)
}

Plot Period of Record - Production Cl Wells

plot_prod_well(2, all_data, cl_limits_all)
 [1] "Date"               "Well.2.CL"          "LOESS.2023"        
 [4] "LOESS.2023.Fig.2.2" "...5"               "...6"              
 [7] "...7"               "...8"               "...9"              
[10] "...10"              "...11"              "...12"             
[13] "...13"              "...14"              "...15"             
[16] "...16"              "...17"              "...18"             
[19] "...19"              "...20"              "...21"             
[22] "...22"              "...23"              "...24"             
[25] "...25"              "...26"              "...27"             
[28] "...28"              "...29"              "...30"             
[31] "...31"              "...32"              "...33"             
[34] "...34"              "...35"              "...36"             
[37] "...37"              "...38"              "...39"             
[40] "...40"              "...41"              "...42"             
[43] "...43"              "...44"              "...45"             
[46] "...46"              "...47"              "...48"             
[49] "...49"              "...50"              "...51"             
[52] "...52"              "...53"              "...54"             
[55] "...55"              "...56"              "...57"             
[58] "...58"              "...59"              "...60"             
[61] "...61"              "...62"             
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

plot_prod_well(3, all_data, cl_limits_all)
 [1] "Date"           "Well.3.CL"      "LOESS.2023"     "LOESS.5yr.2023"
 [5] "...5"           "...6"           "...7"           "...8"          
 [9] "...9"           "...10"          "...11"          "...12"         
[13] "...13"          "...14"          "...15"          "...16"         
[17] "...17"          "...18"          "...19"          "...20"         
[21] "...21"          "...22"          "...23"          "...24"         
[25] "...25"          "...26"          "...27"          "...28"         
[29] "...29"          "...30"          "...31"          "...32"         
[33] "...33"          "...34"          "...35"          "...36"         
[37] "...37"          "...38"          "...39"          "...40"         
[41] "...41"         

plot_prod_well(5, all_data, cl_limits_all)
[1] "Date"              "Well.5.CL...2"     "LOESS.2023"       
[4] "...4"              "Date.5yr"          "Well.5.CL...6"    
[7] "..5.yr.Loess.2023"

plot_prod_well(6, all_data, cl_limits_all)
 [1] "Date"             "Well.6.CL"        "LOESS.2023"       "..5yr.LOESS.2023"
 [5] "...5"             "...6"             "...7"             "...8"            
 [9] "...9"             "...10"            "...11"            "...12"           
[13] "...13"            "...14"            "...15"            "...16"           
[17] "...17"            "...18"            "...19"           

plot_prod_well(7, all_data, cl_limits_all)
[1] "Date"             "Well.7.CL"        "LOESS.2023"       "..5yr.LOESS.2023"

plot_prod_well(9, all_data, cl_limits_all)
 [1] "Date"             "Well.9.CL"        "LOESS.2023"       "..5yr.LOESS.2023"
 [5] "...5"             "...6"             "...7"             "...8"            
 [9] "...9"             "...10"            "...11"            "...12"           
[13] "...13"            "...14"            "...15"            "...16"           
[17] "...17"            "...18"            "...19"            "...20"           
[21] "...21"            "...22"            "...23"            "...24"           
[25] "...25"            "...26"            "...27"            "...28"           
[29] "...29"            "...30"            "...31"           

plot_prod_well(10, all_data, cl_limits_all)
 [1] "Date"                 "Well.10.CL"           "LOESS.2023"          
 [4] "..5yr.LOESS.2023"     "...5"                 "...6"                
 [7] "...7"                 "...8"                 "...9"                
[10] "...10"                "...11"                "...12"               
[13] "...13"                "...14"                "...15"               
[16] "...16"                "...17"                "...18"               
[19] "...19"                "...20"                "...21"               
[22] "...22"                "...23"                "...24"               
[25] "...25"                "...26"                "...27"               
[28] "...28"                "...29"                "...30"               
[31] "...31"                "...32"                "...33"               
[34] "...34"                "...35"                "...36"               
[37] "...37"                "...38"                "...38459"            
[40] "..36.274527080846518" "...42"               

Function 2 - Plotting the Five-Year Period for Production Wells

plot_prod_5yr <- function(well_number, all_data, cl_limits_all) {
  date_start <- as.Date("2019-01-01")
  date_end   <- as.Date("2024-12-31")
  
  # Extract & clean the data
  sheet_name <- paste0("Cl Well ", well_number)
  if (!sheet_name %in% names(all_data)) {
    stop(paste("Sheet", sheet_name, "not found in all_data."))
  }
  df <- all_data[[sheet_name]]
  df <- df[, !duplicated(names(df))]
  col_names <- names(df)
  
  date_col   <- col_names[grepl("^Date$", col_names, ignore.case = TRUE)][1]
  cl_col     <- col_names[grepl(paste0("Well\\.", well_number, "\\.CL"), col_names)][1]
  loess_col  <- col_names[grepl("^LOESS", col_names)][1]
  
  if (is.na(date_col) | is.na(cl_col) | is.na(loess_col)) {
    stop("Could not find one or more required columns in the sheet: ",
         paste(c(date_col, cl_col, loess_col), collapse = ", "))
  }
  
  df <- df %>%
    mutate(
      Date = if (inherits(.data[[date_col]], "Date")) {
        .data[[date_col]]
      } else if (inherits(.data[[date_col]], "POSIXct") || inherits(.data[[date_col]], "POSIXlt")) {
        as.Date(.data[[date_col]])
      } else if (is.numeric(.data[[date_col]])) {
        as.Date(.data[[date_col]], origin = "1900-12-30")
      } else {
        as.Date(as.character(.data[[date_col]]), format = "%m/%d/%y")
      },
      CL = as.numeric(.data[[cl_col]]),
      LOESS = as.numeric(.data[[loess_col]])
    ) %>%
    filter(
      !is.na(Date),
      Date >= date_start & Date <= date_end,
      !is.na(CL),
      !is.na(LOESS)
    )
  
  # Get CL limits for the well and clip 
  cl_limits_df <- cl_limits_all %>%
    filter(well_number == !!well_number) %>%
    mutate(
      seg_start = pmax(start_date, date_start),
      seg_end   = pmin(end_date, date_end)
    ) %>%
    filter(seg_start <= seg_end) 
  
  # Set y-axis range to fit all data and CL limits
  y_max <- max(c(df$CL, df$LOESS, cl_limits_df$cl_value), na.rm = TRUE) * 1.15
  y_min <- min(c(df$CL, df$LOESS, cl_limits_df$cl_value), na.rm = TRUE) * 0.85
  y_min <- min(y_min, 0) 
  
  jan_years <- seq(date_start, date_end, by = "year")
  
  df$Legend <- "Chloride Concentrations"
  
  p <- ggplot(df, aes(x = Date)) +
    geom_point(aes(y = CL, fill = "Chloride Concentrations"), shape = 23, size = 0.5, alpha = 0.5, stroke = 1.2)+
    geom_line(aes(y = LOESS, color = "LOESS (Chloride Concentration)"), size = 1) +
    geom_segment(
      data = cl_limits_df,
      aes(x = seg_start, xend = seg_end, y = cl_value, yend = cl_value, color = "CL Limit"),
      size = 1,
      inherit.aes = FALSE
    ) +
    scale_color_manual(
      name = NULL,
      values = c(
        "Chloride Concentrations" = "darkblue",
        "LOESS (Chloride Concentration)" = "magenta",
        "CL Limit" = "red"
      ),
      breaks = c("CL Limit", "Chloride Concentrations", "LOESS (Chloride Concentration)")
    ) +scale_fill_manual(
  name = NULL,
  values = c("Chloride Concentrations" = "lightblue")
)+
    scale_x_date(
      limits = c(date_start, date_end),
      breaks = jan_years,
      labels = function(d) format(d, "%b-%y"),
      expand = expansion(mult = c(0.01, 0.01))
    ) +
    scale_y_continuous(
      limits = c(y_min, y_max),
      expand = expansion(mult = c(0, 0))
    ) +
    labs(
      title = paste("Five-Year Period - Production Well", well_number),
      x = "Date",
      y = "Chloride Concentration (mg/L)"
    ) +
    theme_minimal(base_size = 15) +
    theme(
      legend.position = "top",
      legend.box = "horizontal",
      axis.text.x = element_text(angle = 0, hjust = 0.5)
    )
  
  print(p)
}

Plot Five-Year Period - Production Cl Wells

plot_prod_5yr(2, all_data, cl_limits_all)

plot_prod_5yr(3, all_data, cl_limits_all)

plot_prod_5yr(5, all_data, cl_limits_all)

plot_prod_5yr(6, all_data, cl_limits_all)

plot_prod_5yr(7, all_data, cl_limits_all)

plot_prod_5yr(9, all_data, cl_limits_all)

plot_prod_5yr(10, all_data, cl_limits_all)

Importing and Standardizing Monitor Well Data

well_file <- "BelleairChlorideReport.xlsx"
well_sheets <- c("WL + Cl MW-4", "WL + Cl MW-20", "WL + Cl MW-21", "WL + Cl MW-25")
well_ranges <- c("A1:S100", "A1:W1100", "A1:W1100", "A1:W1100")
names(well_ranges) <- well_sheets

col_map <- list(
  "WL + Cl MW-4" = list(
    wl_date = "2023 MW4 WL Date",
    water_level = "MW4 WL",
    wl_loess = "2023 POR WL LOESS",
    cl_date = "MW-4 Cl Date",
    chloride = "MW-4 Cl",
    chloride_loess = "2023 POR Cl LOESS"
  ),
  "WL + Cl MW-20" = list(
    wl_date = "2023 MW20 WL Date",
    water_level = "MW20 WL",
    wl_loess = "2023 POR WL LOESS",
    cl_date = "2023 MW20 Cl Date",
    chloride = "MW20 Cl",
    chloride_loess = "2023 POR Cl LOESS"
  ),
  "WL + Cl MW-21" = list(
    wl_date = "2023 MW21 WL Date",
    water_level = "2023 WL",
    wl_loess = "2023 POR WL LOESS",
    cl_date = "2023 MW-21 Cl Date",
    chloride = "2023 MW21 Cl",
    chloride_loess = "2023 POR Cl LOESS"
  ),
  "WL + Cl MW-25" = list(
    wl_date = "2023 MW25 WL Date",
    water_level = "2023 WL",
    wl_loess = "2023 POR WL LOESS",
    cl_date = "2023 MW-25 Cl Date",
    chloride = "2023 MW25 Cl",
    chloride_loess = "2023 POR Cl LOESS"
  )
)

all_monitor_data <- list()

for (sheet in well_sheets) {
  dat <- read_excel(well_file, sheet = sheet, range = well_ranges[[sheet]])
  colnames(dat) <- trimws(colnames(dat))
  
  dat <- dat %>%
    select(
      wl_date        = any_of(col_map[[sheet]]$wl_date),
      water_level    = any_of(col_map[[sheet]]$water_level),
      wl_loess       = any_of(col_map[[sheet]]$wl_loess),
      cl_date        = any_of(col_map[[sheet]]$cl_date),
      chloride       = any_of(col_map[[sheet]]$chloride),
      chloride_loess = any_of(col_map[[sheet]]$chloride_loess)
    )
  
  # Add missing columns as NA 
  needed_cols <- c("wl_date", "water_level", "wl_loess", "cl_date", "chloride", "chloride_loess")
  for (col in needed_cols) {
    if (!col %in% names(dat)) dat[[col]] <- NA
  }
  
  dat <- dat %>%
    mutate(
      wl_date = as.Date(wl_date),
      cl_date = as.Date(cl_date),
      water_level = as.numeric(water_level),
      wl_loess = as.numeric(wl_loess),
      chloride = as.numeric(chloride),
      chloride_loess = as.numeric(chloride_loess)
    )
  
  # Join by date
  dat <- full_join(
    dat %>% select(date = wl_date, water_level, wl_loess),
    dat %>% select(date = cl_date, chloride, chloride_loess),
    by = "date"
  ) %>% filter(!is.na(date))
  
  all_monitor_data[[sheet]] <- dat
}
New names:
New names:
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
Warning in full_join(dat %>% select(date = wl_date, water_level, wl_loess), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1052 of `x` matches multiple rows in `y`.
ℹ Row 321 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
New names:
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
Warning in full_join(dat %>% select(date = wl_date, water_level, wl_loess), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1051 of `x` matches multiple rows in `y`.
ℹ Row 317 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
New names:
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`

Function 3 - Plotting Period of Record for Monitor Wells

plot_dual_y <- function(dat, well_name) {
  dat <- dat[order(dat$date), ]  
  cl_range <- range(dat$chloride, na.rm = TRUE)
  wl_range <- range(dat$water_level, na.rm = TRUE)
  scale_factor <- ifelse(diff(wl_range) == 0, 1, diff(cl_range) / diff(wl_range))
  jan_years <- seq(1996, 2024, by = 4)
  x_breaks <- as.Date(paste0(jan_years, "-01-01"))
  wl_breaks <- seq(25, ceiling(max(wl_range, na.rm=TRUE)/5)*5, by=5)

  ggplot(dat, aes(x = date)) +
    geom_point(
      aes(y = chloride, color = "Chloride Concentration"),
      size = 1,
      fill = alpha("lightblue", 0.5),
      stroke = 1.2
    ) +
    geom_line(
      aes(y = chloride_loess, color = "LOESS Chloride Concentration"),
      size = 1.2, na.rm = TRUE
    ) +
    geom_line(
      aes(y = (water_level - 25) * scale_factor + min(cl_range), color = "Water Level"),
      linetype = "solid", size = 1, na.rm = TRUE
    ) +
    geom_line(
  aes(y = (wl_loess - 25) * scale_factor + min(cl_range)), color = "black", linetype = "dashed",
  size = 1.2,
  na.rm = TRUE,
  show.legend = FALSE        
  )+
    scale_shape_manual(
      name = NULL,
      values = c("Chloride Concentration" = 23)
    ) +
    scale_color_manual(
      name = NULL,
      values = c(
        "Chloride Concentration" = "darkblue",
        "LOESS Chloride Concentration" = "magenta",
        "Water Level" = "deepskyblue",
        "Water Level Loess" = "black"
      )
    ) +
    scale_y_continuous(
      name = "Chloride (mg/L)",
      sec.axis = sec_axis(
        ~ (. - min(cl_range)) / scale_factor + 25,
        name = "Water Level (ft)",
        breaks = wl_breaks
      )
    ) +
    scale_x_date(
      breaks = x_breaks,
      labels = function(d) format(d, "%b-%y"),
      expand = expansion(mult = c(0.01, 0.01))
    ) +
    labs(
      title = well_name,
      x = "Date"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      axis.text.x = element_text(angle = 0, hjust = 1),
      legend.position = "top"
    )
}

Loop Monitor Well Plots

for (well in names(all_monitor_data)) {
  dat <- all_monitor_data[[well]]
  
  print(plot_dual_y(dat, well))
}
Warning: No shared levels found between `names(values)` of the manual scale and the
data's shape values.
Warning: Removed 98 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning: No shared levels found between `names(values)` of the manual scale and the
data's shape values.
Warning: Removed 1040 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning: No shared levels found between `names(values)` of the manual scale and the
data's shape values.
Removed 1040 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning: No shared levels found between `names(values)` of the manual scale and the
data's shape values.
Warning: Removed 1069 rows containing missing values or values outside the scale range
(`geom_point()`).

Chloride vs. Time Nonparametric Correlations - Kendall tau-b, Monitoring Wells, Period of Record

monitor_sheets <- c("WL + Cl MW-4", "WL + Cl MW-20", "WL + Cl MW-21", "WL + Cl MW-25")

extract_chloride <- function(df) {
  date_col <- names(df)[grepl("date", names(df), ignore.case = TRUE)][1]
  cl_col <- names(df)[grepl("cl", names(df), ignore.case = TRUE) & 
                      !grepl("loess|limit|date", names(df), ignore.case = TRUE)][1]
  if (is.na(date_col) | is.na(cl_col)) return(NULL)
  tibble(
    Date = as.Date(df[[date_col]]),
    Chloride = as.numeric(df[[cl_col]])
  )
}

monitor_list <- map(monitor_sheets, function(sheet) {
  df <- all_data[[sheet]]
  dat <- extract_chloride(df)
  if (is.null(dat)) return(NULL)
  well_name <- gsub(".*MW-?([0-9]+).*", "mw\\1", sheet)
  dat %>% rename(!!well_name := Chloride)
})
monitor_list <- compact(monitor_list)

MW_df <- reduce(monitor_list, full_join, by = "Date") %>%
  arrange(Date) %>%
  filter(!is.na(Date))

MW_df$Date_num <- as.numeric(MW_df$Date)

get_kendall <- function(y, x) {
  idx <- !is.na(x) & !is.na(y)
  res <- cor.test(x[idx], y[idx], method = "kendall", exact = FALSE)
  c(
    sprintf("%.3f", as.numeric(res$estimate)),             # Correlation coefficient, 3 decimals
    formatC(res$p.value, format = "f", digits = 3),        # Sig, 3 decimals
    as.integer(sum(idx))                                   # N, integer
  )
}

wells <- grep("^mw", names(MW_df), value = TRUE)
results <- sapply(MW_df[wells], function(y) get_kendall(y, MW_df$Date_num))

all_complete <- complete.cases(MW_df[, wells])
date_row <- c("1.000", "0.000", as.integer(sum(all_complete)))

results <- cbind(Date = date_row, results)
rownames(results) <- c("Correlation-coffecient", "Sig (1 sided)", "N")
print(results)
                       Date    mw4     mw20    mw21     mw25   
Correlation-coffecient "1.000" "0.154" "0.686" "-0.325" "0.502"
Sig (1 sided)          "0.000" "0.000" "0.000" "0.000"  "0.000"
N                      "236"   "320"   "320"   "316"    "342"  

Chloride vs. Time Nonparametric Correlations- Kendall Tau-b, Monitoring Wells, 2018-2022

# Well labels for output
well_labels <- c("MW-4", "MW-20", "MW-21", "MW-25")
well_names  <- names(all_monitor_data)

# Prepare results matrix
result_mat <- matrix(NA, nrow = 3, ncol = length(well_names) + 1)
colnames(result_mat) <- c("Date", well_labels)
rownames(result_mat) <- c("Correlation-coefficient", "Sig (1 sided)", "N")

# Combine all dates for the Date column
all_dates <- unique(unlist(lapply(all_monitor_data, function(x) x$date)))
filtered_dates <- all_dates[all_dates >= as.Date("2018-01-01") & all_dates <= as.Date("2022-12-31")]
result_mat[ , "Date"] <- c("1.000", "0.000", as.character(length(filtered_dates)))

# Loop through each monitoring well
for (i in seq_along(well_names)) {
  dat <- all_monitor_data[[well_names[i]]] %>%
    filter(date >= as.Date("2018-01-01") & date <= as.Date("2022-12-31")) %>%
    select(date, chloride) %>%
    tidyr::drop_na()
  
  if (nrow(dat) >= 2) {
    res <- cor.test(
      x = as.numeric(dat$date),
      y = dat$chloride,
      method = "kendall",
      exact = FALSE
    )
    result_mat["Correlation-coefficient", i+1] <- sprintf("%.3f", as.numeric(res$estimate))
    result_mat["Sig (1 sided)", i+1] <- formatC(res$p.value, format = "f", digits = 3)
    result_mat["N", i+1] <- as.character(nrow(dat))
  } else {
    result_mat["Correlation-coefficient", i+1] <- NA
    result_mat["Sig (1 sided)", i+1] <- NA
    result_mat["N", i+1] <- as.character(nrow(dat))
  }
}

print(result_mat)
                        Date    MW-4 MW-20   MW-21   MW-25   
Correlation-coefficient "1.000" NA   "0.497" "0.396" "-0.418"
Sig (1 sided)           "0.000" NA   "0.000" "0.000" "0.000" 
N                       "113"   "0"  "60"    "57"    "57"    

Checking MW-4 Data for 2018-2022

range(all_monitor_data[["WL + Cl MW-4"]]$date, na.rm = TRUE)
[1] "1996-09-04" "2004-11-01"
# How many non-NA chloride values exist in 2018–2022?
sum(
     all_monitor_data[["WL + Cl MW-4"]]$date >= as.Date("2018-01-01") &
         all_monitor_data[["WL + Cl MW-4"]]$date <= as.Date("2022-12-31") &
       !is.na(all_monitor_data[["WL + Cl MW-4"]]$chloride)
 )
[1] 0