To evaluate how biogeochemical water quality variables interact and vary across wet and dry seasons in two urban canals (Coral Gables and Little River) using principal component analysis (PCA).
Notes
DBHYDRO references
# Install notes (optional):
# install.packages("astsa")
# install.packages("segmented")
library(astsa)
pacman::p_load(tidyverse, cowplot, lubridate, dplyr, dplR, zoo)
library(naniar)
library(DescTools) # PlotACF function
library(WaveletComp)
library(matrixStats)
library(tidyr)
library(ggplot2)
library(readr)
library(gridExtra)
library(ggpubr)
library(viridis)
library(car)
library(devtools)
library(janitor)
library(neonOS)
library(terra)
library(imputeTS)
library(corrplot)
library(segmented)
library(rcompanion)
library(FactoMineR)
library(factoextra)
library(FSA)
library(rcompanion)
# If you did not start RStudio from the project file, set your working directory:
setwd("/Users/lizortiz/Library/CloudStorage/OneDrive-FloridaInternationalUniversity/SFWMD_discharge_precipitation")
# merge buoy_data and discharge (only variables that I want)
new_data <- read.csv("new_data.csv")
head(new_data)
tail(new_data)
# merge new_data (buoy_data and discharge) with precipitation
new_data_prep <- read.csv("new_data_prep.csv")
head(new_data_prep)
tail(new_data_prep)
# merge all buoy variables with discharge and precipitation
new_data_all_final <- read.csv("new_data_all_final.csv")
head(new_data_all_final)
tail(new_data_all_final)
# Add wet/dry seasons + split date
new_data_all_final_2 <- new_data_all_final %>%
separate(Date, into = c("Year", "Month", "Day"), sep = "-", remove = FALSE) %>%
mutate(
Year = as.character(Year),
Month = as.character(Month),
Day = as.character(Day)
)
new_data_all_final_2$Seasons <- ifelse(
grepl("5|6|7|8|9|10", new_data_all_final_2$Month, ignore.case = TRUE),
"Wet",
"Dry"
)
# Replace original
new_data_all_final <- new_data_all_final_2
# Convert discharge to Q (m3/s)
new_data_all_final$Q <- new_data_all_final$Discharge * 0.028316846592
# Ensure Date class
new_data_all_final$Date <- as.Date(new_data_all_final$Date)
# Remove rows between 2021-11-11 and 2021-11-23 (bad temp/cond values)
new_data_all_final_filtered <- new_data_all_final %>%
filter(!(Date >= as.Date("2021-11-11") & Date <= as.Date("2021-11-23")))
# Remove pH outliers
new_data_all_final_filtered_2 <- new_data_all_final_filtered %>%
mutate(daily_EXO_pH = ifelse(daily_EXO_pH < 5 | daily_EXO_pH > 10, NA, daily_EXO_pH))
# Final working dataset
new_data_2 <- new_data_all_final_filtered_2
# Save
write.csv(new_data_2, "new_data_2_20260216.csv", row.names = FALSE)
# Subset canals
new_data_2_coral <- subset(new_data_all_final_filtered_2, Canal == "Coral_Gables")
new_data_2_river <- subset(new_data_all_final_filtered_2, Canal == "Little_River")
dat2 <- subset(new_data_all_final_filtered_2, Canal == "Little_River")
dat <- subset(new_data_all_final_filtered_2, Canal == "Coral_Gables")
bio_variables_little <- dat2[, c(
"daily_EXO_fdom",
"daily_EXO_spCond",
"daily_EXO_do_mgL",
"daily_EXO_sal_psu",
"daily_EXO_pH",
"daily_EXO_chla_ugL",
"Q",
"daily_EXO_Turbidity_NTU",
"Year",
"Seasons",
"Month"
)]
bio_variables_little$Month <- as.character(bio_variables_little$Month)
bio_variables_little <- na.omit(bio_variables_little)
bio_variables_little <- bio_variables_little %>%
dplyr::rename(
fDOM = daily_EXO_fdom,
Conductivity = daily_EXO_spCond,
DO = daily_EXO_do_mgL,
Salinity = daily_EXO_sal_psu,
pH = daily_EXO_pH,
Chlorophyll_a = daily_EXO_chla_ugL,
Discharge = Q
)
iris.pca_little <- PCA(bio_variables_little[, c(1:8)], scale = TRUE, graph = FALSE)
fviz_pca_biplot(
iris.pca_little,
col.ind = bio_variables_little$Seasons,
palette = "jco",
label = "var",
col.var = "black",
repel = TRUE,
addEllipses = TRUE,
legend.title = "Groups"
)
scores_little <- as.data.frame(iris.pca_little$ind$coord)
bio_variables_little$PC1 <- scores_little$Dim.1
bio_variables_little$PC2 <- scores_little$Dim.2
pca1_box_little <- ggplot(bio_variables_little, aes(y = PC1, x = Seasons, color = Seasons)) +
geom_violin() +
geom_boxplot(width = 0.1) +
geom_jitter(shape = 16, position = position_jitter(0.2)) +
labs(x = "", y = "PC1", color = "Seasons") +
theme_minimal()
pca2_box_little <- ggplot(bio_variables_little, aes(y = PC2, x = Seasons, color = Seasons)) +
geom_violin() +
geom_boxplot(width = 0.1) +
geom_jitter(shape = 16, position = position_jitter(0.2)) +
labs(x = "", y = "PC2", color = "Seasons") +
theme_minimal()
pca_box_little <- ggarrange(
pca1_box_little, pca2_box_little,
ncol = 2,
common.legend = TRUE
)
pca_box_little
kruskal.test(PC1 ~ Seasons, data = bio_variables_little)
##
## Kruskal-Wallis rank sum test
##
## data: PC1 by Seasons
## Kruskal-Wallis chi-squared = 116.08, df = 1, p-value < 2.2e-16
dunn_results <- dunnTest(PC1 ~ Seasons, data = bio_variables_little, method = "bonferroni")
p_table <- dunn_results$res
cld <- cldList(P.adj ~ Comparison, data = p_table, threshold = 0.05)
print(cld)
## Group Letter MonoLetter
## 1 Dry a a
## 2 Wet b b
kruskal.test(PC2 ~ Seasons, data = bio_variables_little)
##
## Kruskal-Wallis rank sum test
##
## data: PC2 by Seasons
## Kruskal-Wallis chi-squared = 119.76, df = 1, p-value < 2.2e-16
dunn_results <- dunnTest(PC2 ~ Seasons, data = bio_variables_little, method = "bonferroni")
p_table <- dunn_results$res
cld <- cldList(P.adj ~ Comparison, data = p_table, threshold = 0.05)
print(cld)
## Group Letter MonoLetter
## 1 Dry a a
## 2 Wet b b
bio_variables <- dat[, c(
"daily_EXO_fdom",
"daily_EXO_spCond",
"daily_EXO_do_mgL",
"daily_EXO_sal_psu",
"daily_EXO_pH",
"daily_EXO_chla_ugL",
"Q",
"daily_EXO_Turbidity_NTU",
"Year",
"Seasons",
"Month"
)]
bio_variables$Month <- as.character(bio_variables$Month)
bio_variables <- na.omit(bio_variables)
bio_variables <- bio_variables %>%
dplyr::rename(
fDOM = daily_EXO_fdom,
Conductivity = daily_EXO_spCond,
DO = daily_EXO_do_mgL,
Salinity = daily_EXO_sal_psu,
pH = daily_EXO_pH,
Chlorophyll_a = daily_EXO_chla_ugL,
Discharge = Q
)
iris.pca <- PCA(bio_variables[, c(1:8)], scale = TRUE, graph = FALSE)
fviz_pca_biplot(
iris.pca,
col.ind = bio_variables$Seasons,
palette = "jco",
label = "var",
col.var = "black",
repel = TRUE,
addEllipses = TRUE,
legend.title = "Groups"
)
scores <- as.data.frame(iris.pca$ind$coord)
bio_variables$PC1 <- scores$Dim.1
bio_variables$PC2 <- scores$Dim.2
pca1_box_coral <- ggplot(bio_variables, aes(y = PC1, x = Seasons, color = Seasons)) +
geom_violin() +
geom_boxplot(width = 0.1) +
geom_jitter(shape = 16, position = position_jitter(0.2)) +
labs(x = "", y = "PC1", color = "Seasons") +
theme_minimal()
pca2_box_coral <- ggplot(bio_variables, aes(y = PC2, x = Seasons, color = Seasons)) +
geom_violin() +
geom_boxplot(width = 0.1) +
geom_jitter(shape = 16, position = position_jitter(0.2)) +
labs(x = "", y = "PC2", color = "Seasons") +
theme_minimal()
pca_box_coral <- ggarrange(
pca1_box_coral, pca2_box_coral,
ncol = 2,
common.legend = TRUE
)
pca_box_coral
kruskal.test(PC1 ~ Seasons, data = bio_variables)
##
## Kruskal-Wallis rank sum test
##
## data: PC1 by Seasons
## Kruskal-Wallis chi-squared = 41.098, df = 1, p-value = 1.448e-10
dunn_results <- dunnTest(PC1 ~ Seasons, data = bio_variables, method = "bonferroni")
p_table <- dunn_results$res
cld <- cldList(P.adj ~ Comparison, data = p_table, threshold = 0.05)
print(cld)
## Group Letter MonoLetter
## 1 Dry a a
## 2 Wet b b
kruskal.test(PC2 ~ Seasons, data = bio_variables)
##
## Kruskal-Wallis rank sum test
##
## data: PC2 by Seasons
## Kruskal-Wallis chi-squared = 104.74, df = 1, p-value < 2.2e-16
dunn_results <- dunnTest(PC2 ~ Seasons, data = bio_variables, method = "bonferroni")
p_table <- dunn_results$res
cld <- cldList(P.adj ~ Comparison, data = p_table, threshold = 0.05)
print(cld)
## Group Letter MonoLetter
## 1 Dry a a
## 2 Wet b b