Final Project for Data Analysis and Visualization with R

An R workflow to process, analyze, and visualize select water quality data from Belleair, FL.


Note: The following packages must be loaded to replicate the subsequent scripts:

library(tidyverse)
library(readxl)
library(scales)
library(kableExtra)

Prepare the original Excel Dataset so that we can extract the necessary tibbles for analysis and visualization.

# Import starting Excel data
df <- read_excel("~/Documents/Hunter College/DataVisR/Week_11-15/Data/AF210121 MAIN DATABASE WQ & PUMP_WY2023.xlsx", sheet = "Data", col_types = "text")

# Edit the column names so that they occupy one row 
names(df) <- paste(names(df), df[1, ], sep = "_") #merge row 1 and 2, separated by "_"
df <- df[-1,] #remove the 2nd, now empty, row
colnames(df)[1] <- "Date" #rename the first column "Date"


Examine the structure of “df” aka Tab 1 of the original spreadsheet, containing all the data. In later sections, we will use this to identify the column names needed to make the desired plots.

str(df)
## tibble [12,308 × 66] (S3: tbl_df/tbl/data.frame)
##  $ Date                         : chr [1:12308] "28914" "28945" "28975" "29006" ...
##  $ Wells
## 2 &5_CL Limit       : chr [1:12308] NA NA NA NA ...
##  $ Well 3_CL Limit              : chr [1:12308] NA NA NA NA ...
##  $ Well 5_CL Limit              : chr [1:12308] NA NA NA NA ...
##  $ Well 6_CL Limit              : chr [1:12308] NA NA NA NA ...
##  $ Well 7_CL Limit              : chr [1:12308] NA NA NA NA ...
##  $ Well 9_CL Limit              : chr [1:12308] NA NA NA NA ...
##  $ Well 10_CL Limit             : chr [1:12308] NA NA NA NA ...
##  $ Well 2 CL_(mg/L)             : chr [1:12308] "44" "40" "52" "44" ...
##  $ Well 3 CL_(mg/L)             : chr [1:12308] "40" "40" "40" "44" ...
##  $ Well 5 CL_(mg/L)             : chr [1:12308] NA NA NA "60" ...
##  $ Well 6 CL_(mg/L)             : chr [1:12308] NA "52" "60" "48" ...
##  $ Well 7 CL_(mg/L)             : chr [1:12308] NA NA NA NA ...
##  $ Well 9 CL_(mg/L)             : chr [1:12308] NA NA NA NA ...
##  $ Well 10 CL_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 2 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 3 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 5 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 6 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 7 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 9 TDS_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 10 TDS_(mg/L)           : chr [1:12308] NA NA NA NA ...
##  $ Well 2 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 3 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 5 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 6 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 7 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 9 SO4_(mg/L)            : chr [1:12308] NA NA NA NA ...
##  $ Well 10 SO4_(mg/L)           : chr [1:12308] NA NA NA NA ...
##  $ ...30_NA                     : chr [1:12308] NA NA NA NA ...
##  $ ...31_NA                     : chr [1:12308] NA NA NA NA ...
##  $ DATE_(Daily)                 : chr [1:12308] "32874" "32875" "32876" "32877" ...
##  $ Well 2 PUMP_(GPD)            : chr [1:12308] "270300" "291300" "261900" "274400" ...
##  $ Well 3 PUMP_(GPD)            : chr [1:12308] "180600" "192300" "170200" "183000" ...
##  $ Well 4 PUMP_(GPD)            : chr [1:12308] NA NA "112300" "1600" ...
##  $ Well 5 PUMP_(GPD)            : chr [1:12308] "335800" "363100" "319600" "342600" ...
##  $ Well 6 PUMP_(GPD)            : chr [1:12308] "0" "0" "0" "0" ...
##  $ Well 7 PUMP_(GPD)            : chr [1:12308] NA NA NA NA ...
##  $ Well 9 PUMP_(GPD)            : chr [1:12308] NA NA NA NA ...
##  $ Well 10 PUMP_(GPD)           : chr [1:12308] NA NA NA NA ...
##  $ All Wells Daily_(GPD)        : chr [1:12308] "786700" "846700" "864000" "801600" ...
##  $ Date_Monthly                 : chr [1:12308] "32904" "32932" "32963" "32993" ...
##  $ All Wells Monthly_(Gal/Month): chr [1:12308] "26062300" "24418749" "33141500" "31449500" ...
##  $ ...44_NA                     : chr [1:12308] NA NA NA NA ...
##  $ ...45_NA                     : chr [1:12308] NA NA NA NA ...
##  $ WLs...46_Date                : chr [1:12308] "34407" "34414" "34421" "34430" ...
##  $ WLs...47_MW4                 : chr [1:12308] NA NA NA NA ...
##  $ WLs...48_MW20                : chr [1:12308] NA NA NA NA ...
##  $ WLs...49_MW21                : chr [1:12308] NA NA NA NA ...
##  $ WLs...50_MW25                : chr [1:12308] "39.299999999999997" "40.299999999999997" "40.5" "41.45" ...
##  $ MW CHL_Date                  : chr [1:12308] "34337" "34407" "34435" "34456" ...
##  $ CHL...52_MW4                 : chr [1:12308] NA NA NA NA ...
##  $ CHL...53_MW20                : chr [1:12308] NA NA NA NA ...
##  $ CHL...54_MW21                : chr [1:12308] NA NA NA NA ...
##  $ CHL...55_MW25                : chr [1:12308] "116" "133" "134" "132" ...
##  $ TDS...56_MW4                 : chr [1:12308] NA NA NA NA ...
##  $ TDS...57_MW20                : chr [1:12308] NA NA NA NA ...
##  $ TDS...58_MW21                : chr [1:12308] NA NA NA NA ...
##  $ TDS...59_MW25                : chr [1:12308] NA NA NA NA ...
##  $ SO4...60_MW4                 : chr [1:12308] NA NA NA NA ...
##  $ SO4...61_MW20                : chr [1:12308] NA NA NA NA ...
##  $ SO4...62_MW21                : chr [1:12308] NA NA NA NA ...
##  $ SO4...63_MW25                : chr [1:12308] NA NA NA NA ...
##  $ Rainfall_Date                : chr [1:12308] "37164" "37195" "37256" "37287" ...
##  $ RF Total_inches              : chr [1:12308] "6.44" "11.25" "1.07" "2.42" ...
##  $ RF Avg_inches                : chr [1:12308] "0.21" "0.38" "0.03" "0.08" ...


Create functions to reproduce the figures.

# Before we get into the data, set some formatting parameters:
# Set color and feature format schemes for the plots
waterlevel_colors <- c("Water Level" = "deepskyblue1", "LOESS Water Level" = "black") 
Chl_colors<- c("Chloride Concentration" = "cornflowerblue", "LOESS Chloride Concentration" = "magenta") 

# And add a function that allows for a user-defined legend position
adjust_legendPosition <- function(x_direction, y_direction){
                                  theme(legend.position = c(x_direction, y_direction))}

1. Function to pull the full Water Level record for a given well.

getWaterLevel_FullRecord_plot <- function(WL_date, WL_measurement, insert_plot_title){ 
    # Compile the tibble in a workable format
    df %>% 
    select(all_of(WL_date), all_of(WL_measurement)) %>% #pull only selected columns with date and water level
    rename(Date = WL_date, WaterLevel = WL_measurement) %>% 
    na.omit() %>%
    mutate(WaterLevel = as.numeric(WaterLevel)) %>% 
    mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
    filter(Date > 0 ) %>% #keep only the rows with valid dates
    mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
    
    # Put the tibble into a graph
    ggplot(aes(x = Date)) +
    
    # Set x-axis units and axis/graph labels
    scale_x_date(date_breaks = "3 year",
                 labels = date_format("%b-%Y"))+
    labs(y = "Water Level (ft)",
         x = "Date",
         title = insert_plot_title) +
    
    # Add Water Level and LOESS lines
    geom_line(aes(y = WaterLevel, color = "Water Level"), #add a line plotting water level over time
              linewidth = 0.5) +
    geom_smooth(aes(y = WaterLevel, color = "LOESS Water Level"), 
                method = "loess", #apply LOWESS aka LOESS smoothing line
                span = .15, #the span, or smoothness, visually/manually matched to the original report's figure
                se = FALSE,
                linewidth = 1,
                linetype = "dashed",
                alpha = 0.6) +
    
    # Color and Legend formatting
    scale_color_manual(values = waterlevel_colors)+
    theme(
      legend.title = element_blank(),
      legend.justification = c("right", "top"),
      legend.box.just = "left",
      legend.margin = margin(6, 6, 6, 6),
      legend.box.background = element_rect(color="black", linewidth=0.5)
    )
}


2. Function to pull the full Chloride Concentration record for a given well.

getChl_FullRecord_plot <- function(Chl_date, Chl_measurement, insert_plot_title){
  # Compile the tibble in a workable format
  df %>% 
    select(all_of(Chl_date), all_of(Chl_measurement)) %>% #pull only selected columns with date and Chloride Conc.
    rename(Date = Chl_date, ChlConc = Chl_measurement) %>% 
    na.omit() %>%
    mutate(ChlConc = as.numeric(ChlConc)) %>% 
    mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
    filter(Date > 0 ) %>% #keep only the rows with valid dates
    mutate(Date = as.Date(Date, origin = "1899-12-30")) %>%  #convert dates to legible format
  
    # Put the tibble into a graph
  ggplot(aes(x = Date)) +
    #Set x-axis units and axis/graph labels
    scale_x_date(date_breaks = "3 year", 
                 labels = date_format("%b-%Y"))+
    labs(y = "Chloride Concentration (mg/L)",
         x = "Date",
         title = insert_plot_title) +
  
    # Add Chloride Conc. and LOESS lines  
  geom_point(aes(y = ChlConc, fill = "Chloride Concentration"), #add a line plotting Chl Conc. over time
             shape=23, color="black", stroke = .5, size=1.2) +
  geom_smooth(aes(y = ChlConc, color = "LOESS Chloride Concentration"), 
              method = "loess", #apply LOWESS aka LOESS smoothing line
              span = 0.07, #the span, or smoothness, visually/manually matched to the original report's figure
              se = FALSE,
              linewidth = 1,
              alpha = 0.6) +
  
  # Color and Legend formatting
  scale_color_manual(values = Chl_colors)+
  scale_fill_manual(values = "cornflowerblue") +
  theme(
    legend.title = element_blank(),
    legend.justification = c("right", "top"),
    legend.box.just = "left",
    legend.margin = margin(6, 6, 6, 6),
    legend.box.background = element_rect(color="black", linewidth=0.5)
  )
}


3. Function to pull the Water Level record for a given well, during a user-defined timespan.

# 5-YR RECORD
getWaterLevel_bySetTime_plot <- function(WL_date, WL_measurement, 
                                         insert_plot_title,
                                         start_Date, end_Date){ 
  # Compile the tibble in a workable format
  df %>% 
    select(all_of(WL_date), all_of(WL_measurement)) %>% #pull only selected columns with date and water level
    rename(Date = WL_date, WaterLevel = WL_measurement) %>% 
    na.omit() %>%
    mutate(WaterLevel = as.numeric(WaterLevel)) %>% 
    mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
    filter(Date > 0 ) %>% #keep only the rows with valid dates
    mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
    
    # Put the tibble into a graph
    ggplot(aes(x = Date)) +
    
    #Set x-axis units and axis/graph labels
    scale_x_date(date_breaks = "1 year", 
                 labels = date_format("%b-%Y"),
                 limits = as.Date(c(start_Date, end_Date)))+
    labs(y = "Water Level (ft)",
         x = "Date",
         title = insert_plot_title) +
    
    # Add Water Level and LOESS lines
    geom_line(aes(y = WaterLevel, color = "Water Level"), #add a line plotting water level over time
              linewidth = 0.5) +
    geom_smooth(aes(y = WaterLevel, color = "LOESS Water Level"), 
                method = "loess", #apply LOWESS aka LOESS smoothing line
                span = .35, #the span, or smoothness, visually/manually matched to the original report's figure
                se = FALSE,
                linewidth = 1,
                linetype = "dashed",
                alpha = 0.6) +
    
    # Color and Legend formatting
    scale_color_manual(values = waterlevel_colors)+
    theme(
      legend.title = element_blank(),
      legend.justification = c("right", "top"),
      legend.box.just = "left",
      legend.margin = margin(6, 6, 6, 6),
      legend.box.background = element_rect(color="black", linewidth=0.5)
    )
}


4. Function to pull the Chloride Concentration record for a given well, during a user-defined timespan.

getChl_bySetTime_plot <- function(Chl_date, Chl_measurement, 
                                  insert_plot_title,
                                  start_Date, end_Date){
  # Compile the tibble in a workable format
  df %>% 
    select(all_of(Chl_date), all_of(Chl_measurement)) %>% #pull only selected columns with date and Chloride Conc.
    rename(Date = Chl_date, ChlConc = Chl_measurement) %>% 
    na.omit() %>%
    mutate(ChlConc = as.numeric(ChlConc)) %>% 
    mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
    filter(Date > 0 ) %>% #keep only the rows with valid dates
    mutate(Date = as.Date(Date, origin = "1899-12-30")) %>%  #convert dates to legible format
    
    # Put the tibble into a graph
    ggplot(aes(x = Date)) +
    
    #Set x-axis units and axis/graph labels
    scale_x_date(date_breaks = "1 year", 
                 labels = date_format("%b-%Y"),
                 limits = as.Date(c(start_Date, end_Date)))+
    labs(y = "Chloride Concentration (mg/L)",
         x = "Date",
         title = insert_plot_title) +
    
    # Add Chloride Conc. and LOESS lines
    geom_point(aes(y = ChlConc, fill = "Chloride Concentration"), #add a line plotting Chl Conc. over time
               shape=23, color="black", stroke = .5, size=1.2) +
    geom_smooth(aes(y = ChlConc, color = "LOESS Chloride Concentration"), 
                method = "loess", #apply LOWESS aka LOESS smoothing line
                span = 0.13, #the span, or smoothness, visually/manually matched to the original report's figure
                se = FALSE,
                linewidth = 1,
                alpha = 0.6) +
    
    # Color and Legend formatting
    scale_color_manual(values = Chl_colors)+
    scale_fill_manual(values = "cornflowerblue") +
    theme(
      legend.title = element_blank(),
      legend.justification = c("right", "top"),
      legend.box.just = "left",
      legend.margin = margin(6, 6, 6, 6),
      legend.box.background = element_rect(color="black", linewidth=0.5)
    )
}


Recreating “Figure 3.1 Period of Record - Monitor Well 4 (4S)”

To do so, we need to isolate two tables. Reference the section above where we ran the “str()” function to identify the column names.


  1. Water Level vs. Date, i.e. columns “WLs…46_Date” and “WLs…47_MW4”. Specify an appropriate title for the graph according to the function. The “adjust_legendPosition” function can be removed entirely, or adjusted after initial plotting to determine the optimal location for each graph.
MW4_waterlevel_FullRecord_plot <- getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...47_MW4", 
                                      "Figure 3.1  Period of Record - Monitor Well 4 (4S)")+
                                  adjust_legendPosition(0.35, 0.95)
MW4_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'


  1. Cl Concentration vs. time, i.e. columns “MW CHL_Date” and “CHL…52_MW4”. Specify an appropriate title for the graph according to the function. As before, the “adjust_legendPosition” function can be removed entirely or adjusted after initial plotting.
MW4_Chl_FullRecord_plot <- getChl_FullRecord_plot("MW CHL_Date", "CHL...52_MW4", 
                                                  "Figure 3.1  Period of Record - Monitor Well 4 (4S)")+
                           adjust_legendPosition(0.35, 0.95)
MW4_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'


Recreating “Figure 3.2 Five-Year Period - Monitor Well 4 (4S)”

To do so, we need to do the same as above, but also specify the date range. In this case it is Jan 2018-Jan 2023, though the function will accept any appropriate date range that is entered.


  1. Water level vs. time, i.e. columns “WLs…46_Date” and “WLs…47_MW4”. Specify an appropriate title and date range for the graph. As before, the “adjust_legendPosition” function can be removed entirely or adjusted after initial plotting.
MW4_waterlevel_5yr_plot <- getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...47_MW4", 
                                                   "Figure 3.2 Five-Year Period - Monitor Well 4 (4S)",
                                                   '2018-01-01', '2023-01-01') +
                           adjust_legendPosition(0.35, 0.95)
MW4_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'


  1. Cl Concentration vs. time, i.e. columns “MW CHL_Date” and “CHL…52_MW4”. Specify an appropriate title and date range for the graph. As before, the “adjust_legendPosition” function can be removed entirely or adjusted after initial plotting.
MW4_Chl_5yr_plot <- getChl_bySetTime_plot("MW CHL_Date", "CHL...52_MW4", 
                                      "Figure 3.2 Five-Year Period - Monitor Well 4 (4S)",
                                      '2018-01-01', '2023-01-01') +
                    adjust_legendPosition(0.35, 0.98)
MW4_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'


These functions can of course be applied to the other Monitoring Wells. For example:

Monitoring Well 20

MW20_waterlevel_FullRecord_plot <- 
            getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...48_MW20", 
                                          "Period of Record - Monitor Well 20") +
            adjust_legendPosition(0.45, 0.95)


MW20_Chl_FullRecord_plot <- 
            getChl_FullRecord_plot("MW CHL_Date", "CHL...53_MW20", 
                                   "Period of Record - Monitor Well 20") +
            adjust_legendPosition(0.35, 0.95)


MW20_waterlevel_5yr_plot <- 
            getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...48_MW20", 
                                         "Five-Year Period - Monitor Well 20",
                                         '2018-01-01', '2023-01-01') +
            adjust_legendPosition(0.35, 0.95)


MW20_Chl_5yr_plot <- 
            getChl_bySetTime_plot("MW CHL_Date", "CHL...53_MW20", 
                                  "Five-Year Period - Monitor Well 20",
                                  '2018-01-01', '2023-01-01') +
            adjust_legendPosition(0.35, 0.45)


MW20_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'


Monitoring Well 21

MW21_waterlevel_FullRecord_plot <- 
            getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...49_MW21", 
                                          "Period of Record - Monitor Well 21") +
            adjust_legendPosition(0.7, 0.99)


MW21_Chl_FullRecord_plot <- 
            getChl_FullRecord_plot("MW CHL_Date", "CHL...54_MW21", 
                                   "Period of Record - Monitor Well 21") +
            adjust_legendPosition(0.5, 0.95)


MW21_waterlevel_5yr_plot <- 
            getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...49_MW21", 
                                         "Five-Year Period - Monitor Well 21",
                                         '2018-01-01', '2023-01-01') +
            adjust_legendPosition(0.4, 0.95)


MW21_Chl_5yr_plot <- 
            getChl_bySetTime_plot("MW CHL_Date", "CHL...54_MW21", 
                                  "Five-Year Period - Monitor Well 21",
                                  '2018-01-01', '2023-01-01') +
            adjust_legendPosition(0.95, 0.95)


MW21_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'


Monitoring Well 25

MW25_waterlevel_FullRecord_plot <- 
  getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...50_MW25", 
                                "Period of Record - Monitor Well 25") +
                           adjust_legendPosition(0.55, 0.95)

MW25_Chl_FullRecord_plot <- getChl_FullRecord_plot("MW CHL_Date", "CHL...55_MW25", 
                                                   "Period of Record - Monitor Well 25") +
                            adjust_legendPosition(0.35, 0.95)


MW25_waterlevel_5yr_plot <- getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...50_MW25", 
                                                   "Five-Year Period - Monitor Well 25",
                                                   '2018-01-01', '2023-01-01') +
                            adjust_legendPosition(0.35, 0.95)


MW25_Chl_5yr_plot <- getChl_bySetTime_plot("MW CHL_Date", "CHL...55_MW25", 
                                      "Five-Year Period - Monitor Well 25",
                                      '2018-01-01', '2023-01-01') +
                     adjust_legendPosition(0.95, 0.35)


MW25_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'


Calculating Kendall tau-b for the Period of Record

Compile a tibble for each Monitoring Well, containing just the Date and the Monitoring Well’s Chloride readings.

K_tau_b_tibble_MW4 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...52_MW4")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  rename(Chl_MW4 = "CHL...52_MW4") %>% 
  mutate(Chl_MW4 = as.numeric(Chl_MW4)) %>% 
  filter(!is.na(Chl_MW4)) #Remove all NA values

K_tau_b_tibble_MW20 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...53_MW20")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  rename(Chl_MW20 = "CHL...53_MW20") %>% 
  mutate(Chl_MW20 = as.numeric(Chl_MW20)) %>%
  filter(!is.na(Chl_MW20)) #Remove all NA values

K_tau_b_tibble_MW21 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...54_MW21")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  rename(Chl_MW21 = "CHL...54_MW21") %>% 
  mutate(Chl_MW21 = as.numeric(Chl_MW21)) %>%
  filter(!is.na(Chl_MW21)) #Remove all NA values

K_tau_b_tibble_MW25 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...55_MW25")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  rename(Chl_MW25 = "CHL...55_MW25") %>% 
  mutate(Chl_MW25 = as.numeric(Chl_MW25)) %>% 
  filter(!is.na(Chl_MW25)) #Remove all NA values

# Also make a tibble for Date alone
K_tau_b_tibble_Date <- df %>% 
  select(all_of("MW CHL_Date")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  filter(!is.na(NumDate)) #Remove all NA values


Calculate Kendall tau-b for each Monitoring Well and the Date, and isolate the desired values in the appropriate formats.

# Calculate Kendall tau-b for Monitoring Well 4
Ktau_b_MW4 <- cor.test(K_tau_b_tibble_MW4$Chl_MW4, K_tau_b_tibble_MW4$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW4 <- format(round(as.numeric(Ktau_b_MW4$estimate), 3), nsmall = 3)
Sig_MW4 <- formatC(Ktau_b_MW4$p.value, format = "f", digits = 3)
N_MW4 <- as.character(nrow(K_tau_b_tibble_MW4))

# Calculate Kendall tau-b for Monitoring Well 20
Ktau_b_MW20 <- cor.test(K_tau_b_tibble_MW20$Chl_MW20, K_tau_b_tibble_MW20$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW20 <- format(round(as.numeric(Ktau_b_MW20$estimate), 3), nsmall = 3)
Sig_MW20 <- formatC(Ktau_b_MW20$p.value, format = "f", digits = 3)
N_MW20 <- as.character(nrow(K_tau_b_tibble_MW20))

# Calculate Kendall tau-b for Monitoring Well 21
Ktau_b_MW21 <- cor.test(K_tau_b_tibble_MW21$Chl_MW21, K_tau_b_tibble_MW21$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW21 <- format(round(as.numeric(Ktau_b_MW21$estimate), 3), nsmall = 3)
Sig_MW21 <- formatC(Ktau_b_MW21$p.value, format = "f", digits = 3)
N_MW21 <- as.character(nrow(K_tau_b_tibble_MW21))

# Calculate Kendall tau-b for Monitoring Well 25
Ktau_b_MW25 <- cor.test(K_tau_b_tibble_MW25$Chl_MW25, K_tau_b_tibble_MW25$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW25 <- format(round(as.numeric(Ktau_b_MW25$estimate), 3), nsmall = 3)
Sig_MW25 <- formatC(Ktau_b_MW25$p.value, format = "f", digits = 3)
N_MW25 <- as.character(nrow(K_tau_b_tibble_MW25))

# Calculate Kendall tau-b for the Date
Ktau_b_Date <- cor.test(K_tau_b_tibble_Date$NumDate, K_tau_b_tibble_Date$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_Date <- format(round(as.numeric(Ktau_b_Date$estimate), 3), nsmall = 3)
Sig_Date <- formatC(Ktau_b_Date$p.value, format = "f", digits = 3)
N_Date <- as.character(nrow(K_tau_b_tibble_Date))


Pull the results together into a table format.

Correlation_Coeff_all <- c(CorrCoeff_Date, CorrCoeff_MW4, CorrCoeff_MW20, CorrCoeff_MW21, CorrCoeff_MW25) #compile all results for the Correlation Coefficient
Sig_all <- c(Sig_Date, Sig_MW4, Sig_MW20, Sig_MW21, Sig_MW25) #compile all results for Sig.
N_all <- c(N_Date, N_MW4, N_MW20, N_MW21, N_MW25) #compile all results for N

Final_Ktau_b <- rbind(Correlation_Coeff_all, Sig_all, N_all)
rownames(Final_Ktau_b) <- c("Correlation Coffecient", "Sig.", "N") #change the row names to match
colnames(Final_Ktau_b) <- c("Date", "MW4", "MW20", "MW21", "MW25") #change the column names to match
print(Final_Ktau_b)
##                        Date    MW4     MW20    MW21     MW25   
## Correlation Coffecient "1.000" "0.165" "0.690" "-0.301" "0.487"
## Sig.                   "0.000" "0.000" "0.000" "0.000"  "0.000"
## N                      "360"   "322"   "322"   "321"    "347"

Repeat for 2018-2022

#Filter all tibbles for the 2018-2022 Date range
K_tau_b_tibble_Date_2018to22 <- df %>% 
  select(all_of("MW CHL_Date")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>%
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate". 
  filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
  filter(!is.na(NumDate)) #Remove all NA values

K_tau_b_tibble_MW4_2018to22 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...52_MW4")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate". 
  filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
  rename(Chl_MW4 = "CHL...52_MW4") %>% 
  mutate(Chl_MW4 = as.numeric(Chl_MW4)) %>% 
  filter(!is.na(Chl_MW4)) #Remove all NA values

K_tau_b_tibble_MW20_2018to22 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...53_MW20")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate". 
  filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
  rename(Chl_MW20 = "CHL...53_MW20") %>% 
  mutate(Chl_MW20 = as.numeric(Chl_MW20)) %>%
  filter(!is.na(Chl_MW20)) #Remove all NA values

K_tau_b_tibble_MW21_2018to22 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...54_MW21")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate". 
  filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
  rename(Chl_MW21 = "CHL...54_MW21") %>% 
  mutate(Chl_MW21 = as.numeric(Chl_MW21)) %>%
  filter(!is.na(Chl_MW21)) #Remove all NA values

K_tau_b_tibble_MW25_2018to22 <- df %>% 
  select(all_of("MW CHL_Date"), all_of("CHL...55_MW25")) %>%   #pull only certain columns
  rename(NumDate = "MW CHL_Date") %>% 
  mutate(NumDate = as.numeric(NumDate)) %>%  #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
  mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate". 
  filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
  rename(Chl_MW25 = "CHL...55_MW25") %>% 
  mutate(Chl_MW25 = as.numeric(Chl_MW25)) %>% 
  filter(!is.na(Chl_MW25)) #Remove all NA values


# Calculate Kendall tau-b
Ktau_b_Date_2018to22 <- cor.test(K_tau_b_tibble_Date_2018to22$NumDate, K_tau_b_tibble_Date_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_Date_2018to22 <- format(round(as.numeric(Ktau_b_Date_2018to22$estimate), 3), nsmall = 3)
Sig_Date_2018to22 <- formatC(Ktau_b_Date_2018to22$p.value, format = "f", digits = 3)
N_Date_2018to22 <- as.character(nrow(K_tau_b_tibble_Date_2018to22))

Ktau_b_MW4_2018to22 <- cor.test(K_tau_b_tibble_MW4_2018to22$Chl_MW4, K_tau_b_tibble_MW4_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW4_2018to22 <- format(round(as.numeric(Ktau_b_MW4_2018to22$estimate), 3), nsmall = 3)
Sig_MW4_2018to22 <- formatC(Ktau_b_MW4_2018to22$p.value, format = "f", digits = 3)
N_MW4_2018to22 <- as.character(nrow(K_tau_b_tibble_MW4_2018to22))

Ktau_b_MW20_2018to22 <- cor.test(K_tau_b_tibble_MW20_2018to22$Chl_MW20, K_tau_b_tibble_MW20_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW20_2018to22 <- format(round(as.numeric(Ktau_b_MW20_2018to22$estimate), 3), nsmall = 3)
Sig_MW20_2018to22 <- formatC(Ktau_b_MW20_2018to22$p.value, format = "f", digits = 3)
N_MW20_2018to22 <- as.character(nrow(K_tau_b_tibble_MW20_2018to22))

Ktau_b_MW21_2018to22 <- cor.test(K_tau_b_tibble_MW21_2018to22$Chl_MW21, K_tau_b_tibble_MW21_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW21_2018to22 <- format(round(as.numeric(Ktau_b_MW21_2018to22$estimate), 3), nsmall = 3)
Sig_MW21_2018to22 <- formatC(Ktau_b_MW21_2018to22$p.value, format = "f", digits = 3)
N_MW21_2018to22 <- as.character(nrow(K_tau_b_tibble_MW21_2018to22))

Ktau_b_MW25_2018to22 <- cor.test(K_tau_b_tibble_MW25_2018to22$Chl_MW25, K_tau_b_tibble_MW25_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW25_2018to22 <- format(round(as.numeric(Ktau_b_MW25_2018to22$estimate), 3), nsmall = 3)
Sig_MW25_2018to22 <- formatC(Ktau_b_MW25_2018to22$p.value, format = "f", digits = 3)
N_MW25_2018to22 <- as.character(nrow(K_tau_b_tibble_MW25_2018to22))

#Compile the results
Correlation_Coeff_all_2018to22 <- c(CorrCoeff_Date_2018to22, CorrCoeff_MW4_2018to22, CorrCoeff_MW20_2018to22, CorrCoeff_MW21_2018to22, CorrCoeff_MW25_2018to22)
Sig_all_2018to22 <- c(Sig_Date_2018to22, Sig_MW4_2018to22, Sig_MW20_2018to22, Sig_MW21_2018to22, Sig_MW25_2018to22)
N_all_2018to22 <- c(N_Date_2018to22, N_MW4_2018to22, N_MW20_2018to22, N_MW21_2018to22, N_MW25_2018to22)

Final_Ktau_b_2018to22 <- rbind(Correlation_Coeff_all_2018to22, Sig_all_2018to22, N_all_2018to22)
rownames(Final_Ktau_b_2018to22) <- c("Correlation Coffecient", "Sig.", "N") #change the row names to match
colnames(Final_Ktau_b_2018to22) <- c("Date", "MW4", "MW20", "MW21", "MW25") #change the column names to match
print(Final_Ktau_b_2018to22)
##                        Date    MW4     MW20    MW21    MW25    
## Correlation Coffecient "1.000" "0.258" "0.497" "0.352" "-0.379"
## Sig.                   "0.000" "0.004" "0.000" "0.000" "0.000" 
## N                      "62"    "59"    "60"    "60"    "60"

Format into nicer tables.

Final_Ktau_b %>%
  kbl(caption = "Chloride vs. Time Nonparametric Correlations - Kendall tau-b, Monitoring Wells, Period of Record") %>%
  kable_classic(full_width = F)
Chloride vs. Time Nonparametric Correlations - Kendall tau-b, Monitoring Wells, Period of Record
Date MW4 MW20 MW21 MW25
Correlation Coffecient 1.000 0.165 0.690 -0.301 0.487
Sig. 0.000 0.000 0.000 0.000 0.000
N 360 322 322 321 347
Final_Ktau_b_2018to22 %>%
  kbl(caption = "Chloride vs. Time Nonparametric Correlations - Kendall tau-b, Monitoring Wells, 2018-2022") %>%
  kable_classic(full_width = F)
Chloride vs. Time Nonparametric Correlations - Kendall tau-b, Monitoring Wells, 2018-2022
Date MW4 MW20 MW21 MW25
Correlation Coffecient 1.000 0.258 0.497 0.352 -0.379
Sig. 0.000 0.004 0.000 0.000 0.000
N 62 59 60 60 60