Prepare the original Excel Dataset so that we can extract the
necessary tibbles for analysis and visualization.
# Import starting Excel data
df <- read_excel("~/Documents/Hunter College/DataVisR/Week_11-15/Data/AF210121 MAIN DATABASE WQ & PUMP_WY2023.xlsx", sheet = "Data", col_types = "text")
# Edit the column names so that they occupy one row
names(df) <- paste(names(df), df[1, ], sep = "_") #merge row 1 and 2, separated by "_"
df <- df[-1,] #remove the 2nd, now empty, row
colnames(df)[1] <- "Date" #rename the first column "Date"
Examine the structure of “df” aka Tab 1 of the original spreadsheet,
containing all the data. In later sections, we will use this to identify
the column names needed to make the desired plots.
str(df)
## tibble [12,308 × 66] (S3: tbl_df/tbl/data.frame)
## $ Date : chr [1:12308] "28914" "28945" "28975" "29006" ...
## $ Wells
## 2 &5_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 3_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 5_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 6_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 7_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 9_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 10_CL Limit : chr [1:12308] NA NA NA NA ...
## $ Well 2 CL_(mg/L) : chr [1:12308] "44" "40" "52" "44" ...
## $ Well 3 CL_(mg/L) : chr [1:12308] "40" "40" "40" "44" ...
## $ Well 5 CL_(mg/L) : chr [1:12308] NA NA NA "60" ...
## $ Well 6 CL_(mg/L) : chr [1:12308] NA "52" "60" "48" ...
## $ Well 7 CL_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 9 CL_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 10 CL_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 2 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 3 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 5 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 6 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 7 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 9 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 10 TDS_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 2 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 3 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 5 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 6 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 7 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 9 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ Well 10 SO4_(mg/L) : chr [1:12308] NA NA NA NA ...
## $ ...30_NA : chr [1:12308] NA NA NA NA ...
## $ ...31_NA : chr [1:12308] NA NA NA NA ...
## $ DATE_(Daily) : chr [1:12308] "32874" "32875" "32876" "32877" ...
## $ Well 2 PUMP_(GPD) : chr [1:12308] "270300" "291300" "261900" "274400" ...
## $ Well 3 PUMP_(GPD) : chr [1:12308] "180600" "192300" "170200" "183000" ...
## $ Well 4 PUMP_(GPD) : chr [1:12308] NA NA "112300" "1600" ...
## $ Well 5 PUMP_(GPD) : chr [1:12308] "335800" "363100" "319600" "342600" ...
## $ Well 6 PUMP_(GPD) : chr [1:12308] "0" "0" "0" "0" ...
## $ Well 7 PUMP_(GPD) : chr [1:12308] NA NA NA NA ...
## $ Well 9 PUMP_(GPD) : chr [1:12308] NA NA NA NA ...
## $ Well 10 PUMP_(GPD) : chr [1:12308] NA NA NA NA ...
## $ All Wells Daily_(GPD) : chr [1:12308] "786700" "846700" "864000" "801600" ...
## $ Date_Monthly : chr [1:12308] "32904" "32932" "32963" "32993" ...
## $ All Wells Monthly_(Gal/Month): chr [1:12308] "26062300" "24418749" "33141500" "31449500" ...
## $ ...44_NA : chr [1:12308] NA NA NA NA ...
## $ ...45_NA : chr [1:12308] NA NA NA NA ...
## $ WLs...46_Date : chr [1:12308] "34407" "34414" "34421" "34430" ...
## $ WLs...47_MW4 : chr [1:12308] NA NA NA NA ...
## $ WLs...48_MW20 : chr [1:12308] NA NA NA NA ...
## $ WLs...49_MW21 : chr [1:12308] NA NA NA NA ...
## $ WLs...50_MW25 : chr [1:12308] "39.299999999999997" "40.299999999999997" "40.5" "41.45" ...
## $ MW CHL_Date : chr [1:12308] "34337" "34407" "34435" "34456" ...
## $ CHL...52_MW4 : chr [1:12308] NA NA NA NA ...
## $ CHL...53_MW20 : chr [1:12308] NA NA NA NA ...
## $ CHL...54_MW21 : chr [1:12308] NA NA NA NA ...
## $ CHL...55_MW25 : chr [1:12308] "116" "133" "134" "132" ...
## $ TDS...56_MW4 : chr [1:12308] NA NA NA NA ...
## $ TDS...57_MW20 : chr [1:12308] NA NA NA NA ...
## $ TDS...58_MW21 : chr [1:12308] NA NA NA NA ...
## $ TDS...59_MW25 : chr [1:12308] NA NA NA NA ...
## $ SO4...60_MW4 : chr [1:12308] NA NA NA NA ...
## $ SO4...61_MW20 : chr [1:12308] NA NA NA NA ...
## $ SO4...62_MW21 : chr [1:12308] NA NA NA NA ...
## $ SO4...63_MW25 : chr [1:12308] NA NA NA NA ...
## $ Rainfall_Date : chr [1:12308] "37164" "37195" "37256" "37287" ...
## $ RF Total_inches : chr [1:12308] "6.44" "11.25" "1.07" "2.42" ...
## $ RF Avg_inches : chr [1:12308] "0.21" "0.38" "0.03" "0.08" ...
Create functions to reproduce the figures.
# Before we get into the data, set some formatting parameters:
# Set color and feature format schemes for the plots
waterlevel_colors <- c("Water Level" = "deepskyblue1", "LOESS Water Level" = "black")
Chl_colors<- c("Chloride Concentration" = "cornflowerblue", "LOESS Chloride Concentration" = "magenta")
# And add a function that allows for a user-defined legend position
adjust_legendPosition <- function(x_direction, y_direction){
theme(legend.position = c(x_direction, y_direction))}
1. Function to pull the full Water Level record for a given
well.
getWaterLevel_FullRecord_plot <- function(WL_date, WL_measurement, insert_plot_title){
# Compile the tibble in a workable format
df %>%
select(all_of(WL_date), all_of(WL_measurement)) %>% #pull only selected columns with date and water level
rename(Date = WL_date, WaterLevel = WL_measurement) %>%
na.omit() %>%
mutate(WaterLevel = as.numeric(WaterLevel)) %>%
mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
filter(Date > 0 ) %>% #keep only the rows with valid dates
mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
# Put the tibble into a graph
ggplot(aes(x = Date)) +
# Set x-axis units and axis/graph labels
scale_x_date(date_breaks = "3 year",
labels = date_format("%b-%Y"))+
labs(y = "Water Level (ft)",
x = "Date",
title = insert_plot_title) +
# Add Water Level and LOESS lines
geom_line(aes(y = WaterLevel, color = "Water Level"), #add a line plotting water level over time
linewidth = 0.5) +
geom_smooth(aes(y = WaterLevel, color = "LOESS Water Level"),
method = "loess", #apply LOWESS aka LOESS smoothing line
span = .15, #the span, or smoothness, visually/manually matched to the original report's figure
se = FALSE,
linewidth = 1,
linetype = "dashed",
alpha = 0.6) +
# Color and Legend formatting
scale_color_manual(values = waterlevel_colors)+
theme(
legend.title = element_blank(),
legend.justification = c("right", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6),
legend.box.background = element_rect(color="black", linewidth=0.5)
)
}
2. Function to pull the full Chloride Concentration record for a
given well.
getChl_FullRecord_plot <- function(Chl_date, Chl_measurement, insert_plot_title){
# Compile the tibble in a workable format
df %>%
select(all_of(Chl_date), all_of(Chl_measurement)) %>% #pull only selected columns with date and Chloride Conc.
rename(Date = Chl_date, ChlConc = Chl_measurement) %>%
na.omit() %>%
mutate(ChlConc = as.numeric(ChlConc)) %>%
mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
filter(Date > 0 ) %>% #keep only the rows with valid dates
mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
# Put the tibble into a graph
ggplot(aes(x = Date)) +
#Set x-axis units and axis/graph labels
scale_x_date(date_breaks = "3 year",
labels = date_format("%b-%Y"))+
labs(y = "Chloride Concentration (mg/L)",
x = "Date",
title = insert_plot_title) +
# Add Chloride Conc. and LOESS lines
geom_point(aes(y = ChlConc, fill = "Chloride Concentration"), #add a line plotting Chl Conc. over time
shape=23, color="black", stroke = .5, size=1.2) +
geom_smooth(aes(y = ChlConc, color = "LOESS Chloride Concentration"),
method = "loess", #apply LOWESS aka LOESS smoothing line
span = 0.07, #the span, or smoothness, visually/manually matched to the original report's figure
se = FALSE,
linewidth = 1,
alpha = 0.6) +
# Color and Legend formatting
scale_color_manual(values = Chl_colors)+
scale_fill_manual(values = "cornflowerblue") +
theme(
legend.title = element_blank(),
legend.justification = c("right", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6),
legend.box.background = element_rect(color="black", linewidth=0.5)
)
}
3. Function to pull the Water Level record for a given well, during
a user-defined timespan.
# 5-YR RECORD
getWaterLevel_bySetTime_plot <- function(WL_date, WL_measurement,
insert_plot_title,
start_Date, end_Date){
# Compile the tibble in a workable format
df %>%
select(all_of(WL_date), all_of(WL_measurement)) %>% #pull only selected columns with date and water level
rename(Date = WL_date, WaterLevel = WL_measurement) %>%
na.omit() %>%
mutate(WaterLevel = as.numeric(WaterLevel)) %>%
mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
filter(Date > 0 ) %>% #keep only the rows with valid dates
mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
# Put the tibble into a graph
ggplot(aes(x = Date)) +
#Set x-axis units and axis/graph labels
scale_x_date(date_breaks = "1 year",
labels = date_format("%b-%Y"),
limits = as.Date(c(start_Date, end_Date)))+
labs(y = "Water Level (ft)",
x = "Date",
title = insert_plot_title) +
# Add Water Level and LOESS lines
geom_line(aes(y = WaterLevel, color = "Water Level"), #add a line plotting water level over time
linewidth = 0.5) +
geom_smooth(aes(y = WaterLevel, color = "LOESS Water Level"),
method = "loess", #apply LOWESS aka LOESS smoothing line
span = .35, #the span, or smoothness, visually/manually matched to the original report's figure
se = FALSE,
linewidth = 1,
linetype = "dashed",
alpha = 0.6) +
# Color and Legend formatting
scale_color_manual(values = waterlevel_colors)+
theme(
legend.title = element_blank(),
legend.justification = c("right", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6),
legend.box.background = element_rect(color="black", linewidth=0.5)
)
}
4. Function to pull the Chloride Concentration record for a given
well, during a user-defined timespan.
getChl_bySetTime_plot <- function(Chl_date, Chl_measurement,
insert_plot_title,
start_Date, end_Date){
# Compile the tibble in a workable format
df %>%
select(all_of(Chl_date), all_of(Chl_measurement)) %>% #pull only selected columns with date and Chloride Conc.
rename(Date = Chl_date, ChlConc = Chl_measurement) %>%
na.omit() %>%
mutate(ChlConc = as.numeric(ChlConc)) %>%
mutate(Date = as.numeric(Date)) %>% #R is currently reading the columns as characters. Convert values to num.
filter(Date > 0 ) %>% #keep only the rows with valid dates
mutate(Date = as.Date(Date, origin = "1899-12-30")) %>% #convert dates to legible format
# Put the tibble into a graph
ggplot(aes(x = Date)) +
#Set x-axis units and axis/graph labels
scale_x_date(date_breaks = "1 year",
labels = date_format("%b-%Y"),
limits = as.Date(c(start_Date, end_Date)))+
labs(y = "Chloride Concentration (mg/L)",
x = "Date",
title = insert_plot_title) +
# Add Chloride Conc. and LOESS lines
geom_point(aes(y = ChlConc, fill = "Chloride Concentration"), #add a line plotting Chl Conc. over time
shape=23, color="black", stroke = .5, size=1.2) +
geom_smooth(aes(y = ChlConc, color = "LOESS Chloride Concentration"),
method = "loess", #apply LOWESS aka LOESS smoothing line
span = 0.13, #the span, or smoothness, visually/manually matched to the original report's figure
se = FALSE,
linewidth = 1,
alpha = 0.6) +
# Color and Legend formatting
scale_color_manual(values = Chl_colors)+
scale_fill_manual(values = "cornflowerblue") +
theme(
legend.title = element_blank(),
legend.justification = c("right", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6),
legend.box.background = element_rect(color="black", linewidth=0.5)
)
}
These functions can of course be applied to the other Monitoring
Wells. For example:
Monitoring Well 20
MW20_waterlevel_FullRecord_plot <-
getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...48_MW20",
"Period of Record - Monitor Well 20") +
adjust_legendPosition(0.45, 0.95)
MW20_Chl_FullRecord_plot <-
getChl_FullRecord_plot("MW CHL_Date", "CHL...53_MW20",
"Period of Record - Monitor Well 20") +
adjust_legendPosition(0.35, 0.95)
MW20_waterlevel_5yr_plot <-
getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...48_MW20",
"Five-Year Period - Monitor Well 20",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.35, 0.95)
MW20_Chl_5yr_plot <-
getChl_bySetTime_plot("MW CHL_Date", "CHL...53_MW20",
"Five-Year Period - Monitor Well 20",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.35, 0.45)
MW20_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW20_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

Monitoring Well 21
MW21_waterlevel_FullRecord_plot <-
getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...49_MW21",
"Period of Record - Monitor Well 21") +
adjust_legendPosition(0.7, 0.99)
MW21_Chl_FullRecord_plot <-
getChl_FullRecord_plot("MW CHL_Date", "CHL...54_MW21",
"Period of Record - Monitor Well 21") +
adjust_legendPosition(0.5, 0.95)
MW21_waterlevel_5yr_plot <-
getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...49_MW21",
"Five-Year Period - Monitor Well 21",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.4, 0.95)
MW21_Chl_5yr_plot <-
getChl_bySetTime_plot("MW CHL_Date", "CHL...54_MW21",
"Five-Year Period - Monitor Well 21",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.95, 0.95)
MW21_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW21_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

Monitoring Well 25
MW25_waterlevel_FullRecord_plot <-
getWaterLevel_FullRecord_plot("WLs...46_Date", "WLs...50_MW25",
"Period of Record - Monitor Well 25") +
adjust_legendPosition(0.55, 0.95)
MW25_Chl_FullRecord_plot <- getChl_FullRecord_plot("MW CHL_Date", "CHL...55_MW25",
"Period of Record - Monitor Well 25") +
adjust_legendPosition(0.35, 0.95)
MW25_waterlevel_5yr_plot <- getWaterLevel_bySetTime_plot("WLs...46_Date", "WLs...50_MW25",
"Five-Year Period - Monitor Well 25",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.35, 0.95)
MW25_Chl_5yr_plot <- getChl_bySetTime_plot("MW CHL_Date", "CHL...55_MW25",
"Five-Year Period - Monitor Well 25",
'2018-01-01', '2023-01-01') +
adjust_legendPosition(0.95, 0.35)
MW25_waterlevel_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_Chl_FullRecord_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_waterlevel_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

MW25_Chl_5yr_plot
## `geom_smooth()` using formula = 'y ~ x'

Calculating Kendall tau-b for the Period of Record
Compile a tibble for each Monitoring Well, containing just the Date
and the Monitoring Well’s Chloride readings.
K_tau_b_tibble_MW4 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...52_MW4")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
rename(Chl_MW4 = "CHL...52_MW4") %>%
mutate(Chl_MW4 = as.numeric(Chl_MW4)) %>%
filter(!is.na(Chl_MW4)) #Remove all NA values
K_tau_b_tibble_MW20 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...53_MW20")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
rename(Chl_MW20 = "CHL...53_MW20") %>%
mutate(Chl_MW20 = as.numeric(Chl_MW20)) %>%
filter(!is.na(Chl_MW20)) #Remove all NA values
K_tau_b_tibble_MW21 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...54_MW21")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
rename(Chl_MW21 = "CHL...54_MW21") %>%
mutate(Chl_MW21 = as.numeric(Chl_MW21)) %>%
filter(!is.na(Chl_MW21)) #Remove all NA values
K_tau_b_tibble_MW25 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...55_MW25")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
rename(Chl_MW25 = "CHL...55_MW25") %>%
mutate(Chl_MW25 = as.numeric(Chl_MW25)) %>%
filter(!is.na(Chl_MW25)) #Remove all NA values
# Also make a tibble for Date alone
K_tau_b_tibble_Date <- df %>%
select(all_of("MW CHL_Date")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
filter(!is.na(NumDate)) #Remove all NA values
Repeat for 2018-2022
#Filter all tibbles for the 2018-2022 Date range
K_tau_b_tibble_Date_2018to22 <- df %>%
select(all_of("MW CHL_Date")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate".
filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
filter(!is.na(NumDate)) #Remove all NA values
K_tau_b_tibble_MW4_2018to22 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...52_MW4")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate".
filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
rename(Chl_MW4 = "CHL...52_MW4") %>%
mutate(Chl_MW4 = as.numeric(Chl_MW4)) %>%
filter(!is.na(Chl_MW4)) #Remove all NA values
K_tau_b_tibble_MW20_2018to22 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...53_MW20")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate".
filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
rename(Chl_MW20 = "CHL...53_MW20") %>%
mutate(Chl_MW20 = as.numeric(Chl_MW20)) %>%
filter(!is.na(Chl_MW20)) #Remove all NA values
K_tau_b_tibble_MW21_2018to22 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...54_MW21")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate".
filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
rename(Chl_MW21 = "CHL...54_MW21") %>%
mutate(Chl_MW21 = as.numeric(Chl_MW21)) %>%
filter(!is.na(Chl_MW21)) #Remove all NA values
K_tau_b_tibble_MW25_2018to22 <- df %>%
select(all_of("MW CHL_Date"), all_of("CHL...55_MW25")) %>% #pull only certain columns
rename(NumDate = "MW CHL_Date") %>%
mutate(NumDate = as.numeric(NumDate)) %>% #R is currently reading the columns as characters. Convert values to num. This is the Excel numerical date format.
mutate(ProperDate = as.Date(NumDate, origin = "1899-12-30")) %>% #Make a new column with the "ProperDate".
filter(ProperDate > "2018-01-01" & ProperDate < "2022-12-31") %>% #Filter for readings from 2018 to 2022
rename(Chl_MW25 = "CHL...55_MW25") %>%
mutate(Chl_MW25 = as.numeric(Chl_MW25)) %>%
filter(!is.na(Chl_MW25)) #Remove all NA values
# Calculate Kendall tau-b
Ktau_b_Date_2018to22 <- cor.test(K_tau_b_tibble_Date_2018to22$NumDate, K_tau_b_tibble_Date_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_Date_2018to22 <- format(round(as.numeric(Ktau_b_Date_2018to22$estimate), 3), nsmall = 3)
Sig_Date_2018to22 <- formatC(Ktau_b_Date_2018to22$p.value, format = "f", digits = 3)
N_Date_2018to22 <- as.character(nrow(K_tau_b_tibble_Date_2018to22))
Ktau_b_MW4_2018to22 <- cor.test(K_tau_b_tibble_MW4_2018to22$Chl_MW4, K_tau_b_tibble_MW4_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW4_2018to22 <- format(round(as.numeric(Ktau_b_MW4_2018to22$estimate), 3), nsmall = 3)
Sig_MW4_2018to22 <- formatC(Ktau_b_MW4_2018to22$p.value, format = "f", digits = 3)
N_MW4_2018to22 <- as.character(nrow(K_tau_b_tibble_MW4_2018to22))
Ktau_b_MW20_2018to22 <- cor.test(K_tau_b_tibble_MW20_2018to22$Chl_MW20, K_tau_b_tibble_MW20_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW20_2018to22 <- format(round(as.numeric(Ktau_b_MW20_2018to22$estimate), 3), nsmall = 3)
Sig_MW20_2018to22 <- formatC(Ktau_b_MW20_2018to22$p.value, format = "f", digits = 3)
N_MW20_2018to22 <- as.character(nrow(K_tau_b_tibble_MW20_2018to22))
Ktau_b_MW21_2018to22 <- cor.test(K_tau_b_tibble_MW21_2018to22$Chl_MW21, K_tau_b_tibble_MW21_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW21_2018to22 <- format(round(as.numeric(Ktau_b_MW21_2018to22$estimate), 3), nsmall = 3)
Sig_MW21_2018to22 <- formatC(Ktau_b_MW21_2018to22$p.value, format = "f", digits = 3)
N_MW21_2018to22 <- as.character(nrow(K_tau_b_tibble_MW21_2018to22))
Ktau_b_MW25_2018to22 <- cor.test(K_tau_b_tibble_MW25_2018to22$Chl_MW25, K_tau_b_tibble_MW25_2018to22$NumDate, method = "kendall", exact = FALSE)
CorrCoeff_MW25_2018to22 <- format(round(as.numeric(Ktau_b_MW25_2018to22$estimate), 3), nsmall = 3)
Sig_MW25_2018to22 <- formatC(Ktau_b_MW25_2018to22$p.value, format = "f", digits = 3)
N_MW25_2018to22 <- as.character(nrow(K_tau_b_tibble_MW25_2018to22))
#Compile the results
Correlation_Coeff_all_2018to22 <- c(CorrCoeff_Date_2018to22, CorrCoeff_MW4_2018to22, CorrCoeff_MW20_2018to22, CorrCoeff_MW21_2018to22, CorrCoeff_MW25_2018to22)
Sig_all_2018to22 <- c(Sig_Date_2018to22, Sig_MW4_2018to22, Sig_MW20_2018to22, Sig_MW21_2018to22, Sig_MW25_2018to22)
N_all_2018to22 <- c(N_Date_2018to22, N_MW4_2018to22, N_MW20_2018to22, N_MW21_2018to22, N_MW25_2018to22)
Final_Ktau_b_2018to22 <- rbind(Correlation_Coeff_all_2018to22, Sig_all_2018to22, N_all_2018to22)
rownames(Final_Ktau_b_2018to22) <- c("Correlation Coffecient", "Sig.", "N") #change the row names to match
colnames(Final_Ktau_b_2018to22) <- c("Date", "MW4", "MW20", "MW21", "MW25") #change the column names to match
print(Final_Ktau_b_2018to22)
## Date MW4 MW20 MW21 MW25
## Correlation Coffecient "1.000" "0.258" "0.497" "0.352" "-0.379"
## Sig. "0.000" "0.004" "0.000" "0.000" "0.000"
## N "62" "59" "60" "60" "60"