Analysis of maximum annual flood events in the Monongahela River watershed.
library(tidyverse)
library(ncdf4)
library(CFtime)
library(ggpmisc) # to display trendline equation
library(scales) # plot formatting
library(ggridges) # ridge plots
# Load stream gage information
stream_gages <- read_csv("huc4_0502_stream_gages_20yr_plus.csv") |>
mutate(
site_no = as.factor(site_no),
station_nm = as.factor(station_nm),
count_nu = as.integer(count_nu)
) |>
select(site_no, station_nm, dec_lat_va, dec_long_va, drain_area_va)
head(stream_gages)
## # A tibble: 6 × 5
## site_no station_nm dec_lat_va dec_long_va drain_area_va
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 03050000 TYGART VALLEY RIVER NEAR DAILEY… 38.8 -79.9 188
## 2 03050500 TYGART VALLEY RIVER NEAR ELKINS… 38.9 -79.9 273
## 3 03051000 TYGART VALLEY RIVER AT BELINGTO… 39.0 -79.9 415
## 4 03051500 MIDDLE FORK RIVER AT MIDVALE, WV 38.9 -80.1 123
## 5 03052000 MIDDLE FORK RIVER AT AUDRA, WV 39.0 -80.1 149
## 6 03052500 SAND RUN NEAR BUCKHANNON, WV 39.0 -80.2 14.3
# Load maximum annual floods
max_floods <- read_csv("max_annual_floods.csv") |>
mutate(
site_no = as.factor(site_no),
station_name = as.factor(station_name),
year = as.integer(year),
max_flood_discharge = as.integer(max_flood_discharge),
max_flood_month = as.integer(max_flood_month),
max_flood_day = as.integer(max_flood_day),
max_flood_date = as.Date(max_flood_date),
season = factor(season, levels = c("Winter", "Spring", "Summer", "Autumn"))
)
head(max_floods)
## # A tibble: 6 × 8
## site_no station_name year max_flood_date max_flood_discharge max_flood_month
## <fct> <fct> <int> <date> <int> <int>
## 1 03050000 TYGART VALL… 1948 1948-02-14 7640 2
## 2 03050000 TYGART VALL… 1950 1950-06-25 6240 6
## 3 03050000 TYGART VALL… 1952 1952-01-28 5100 1
## 4 03050000 TYGART VALL… 1953 1953-02-22 4320 2
## 5 03050000 TYGART VALL… 1954 1954-10-16 10400 10
## 6 03050000 TYGART VALL… 1956 1956-05-28 7250 5
## # ℹ 2 more variables: max_flood_day <int>, season <fct>
# Join datasets to get coordinates for each flood event
floods_with_coords <- max_floods |>
left_join(stream_gages, by = "site_no") |>
filter(!is.na(dec_lat_va), !is.na(dec_long_va)) |>
arrange(year, site_no)
cat("Total flood events to process:", nrow(floods_with_coords), "\n")
## Total flood events to process: 2416
cat("Year range:", min(floods_with_coords$year), "to", max(floods_with_coords$year), "\n")
## Year range: 1948 to 2024
head(floods_with_coords)
## # A tibble: 6 × 12
## site_no station_name year max_flood_date max_flood_discharge max_flood_month
## <fct> <fct> <int> <date> <int> <int>
## 1 03050000 TYGART VALL… 1948 1948-02-14 7640 2
## 2 03050500 TYGART VALL… 1948 1948-02-14 8810 2
## 3 03051000 TYGART VALL… 1948 1948-02-14 13200 2
## 4 03052000 MIDDLE FORK… 1948 1948-02-14 8470 2
## 5 03052500 SAND RUN NE… 1948 1948-03-24 1320 3
## 6 03053500 BUCKHANNON … 1948 1948-02-14 10300 2
## # ℹ 6 more variables: max_flood_day <int>, season <fct>, station_nm <fct>,
## # dec_lat_va <dbl>, dec_long_va <dbl>, drain_area_va <dbl>
# Directory containing NetCDF files
data_directory <- "noaa_precip_data"
#' Extract precipitation data for flood events in a single year
#'
#' This function processes all flood events for a given year by opening the
#' corresponding NetCDF precipitation file once and extracting both single-day
#' and 3-day precipitation totals for each flood event. The 3-day total includes
#' precipitation from the flood day and the two preceding days.
#'
#' @param year_data A data frame containing flood events for a single year.
#' Must include columns: `max_flood_date`, `dec_lat_va`, `dec_long_va`
#' @param year Integer. The year being processed (used for NetCDF filename construction)
#' @param data_dir Character. Directory path containing NetCDF files.
#' Default is "noaa_precip_data"
#'
#' @return A data frame identical to `year_data` but with two additional columns:
#' \describe{
#' \item{precipitation_mm}{Numeric. Single-day precipitation on flood date}
#' \item{precipitation3_mm}{Numeric. 3-day precipitation total (flood day + 2 prior days)}
#' }
#' Both columns will contain NA values for failed extractions or invalid dates.
#'
#' @details
#' The function expects NetCDF files following the naming convention:
#' `precip.V1.0.YYYY.nc` where YYYY is the year.
#'
#' **Coordinate Handling:**
#' - Longitude coordinates are converted from 0-360° to -180-180° format
#' - Nearest grid cell matching is used for spatial extraction
#'
#' **Temporal Handling:**
#' - Uses CFtime package for proper time coordinate interpretation
#' - 3-day precipitation window: [flood_date - 2 days, flood_date]
#'
#' **Limitations:**
#' - Flood events on January 1-2 receive NA for `precipitation3_mm` to avoid
#' cross-year file complications
#' - Missing dates in NetCDF files result in NA values
#' - File I/O errors are caught and logged as warnings
#'
#' @examples
#' \dontrun{
#' # Example year_data structure:
#' floods_2020 <- data.frame(
#' max_flood_date = as.Date(c("2020-03-15", "2020-07-22")),
#' dec_lat_va = c(38.8, 39.1),
#' dec_long_va = c(-79.9, -80.1)
#' )
#'
#' # Extract precipitation data
#' result <- extract_year_precipitation(floods_2020, 2020, "path/to/netcdf/files")
#' }
#'
extract_year_precipitation <- function(year_data, year, data_dir = data_directory) {
cat("Processing year", year, "with", nrow(year_data), "flood events...\n")
# Construct NetCDF filename based on your naming convention
nc_file <- file.path(data_dir, paste0("precip.V1.0.", year, ".nc"))
# Check if file exists
if (!file.exists(nc_file)) {
warning("NetCDF file not found for year ", year, ": ", nc_file)
return(year_data |>
mutate(precipitation_mm = NA_real_,
precipitation3_mm = NA_real_))
}
tryCatch({
# Open NetCDF file
nc <- nc_open(nc_file)
# Get coordinates and convert longitude
lon_raw <- ncvar_get(nc, "lon")
lon_grid <- lon_raw - 360 # Convert to western longitude
lat_grid <- ncvar_get(nc, "lat")
# Handle time using CFtime
time_dim <- nc$dim$time
cf_time <- CFtime(time_dim$units, time_dim$calendar, time_dim$vals)
timestamps <- as_timestamp(cf_time)
file_dates <- as.Date(timestamps)
# Get precipitation variable name (first variable)
precip_var_name <- names(nc$var)[1]
# Extract precipitation for each flood event
precip_results <- map_dfr(1:nrow(year_data), function(i) {
flood_date <- year_data$max_flood_date[i]
# Skip events on Jan 1-2 to avoid cross-year complications
if (month(flood_date) == 1 && day(flood_date) <= 2) {
return(tibble(precipitation_mm = NA_real_, precipitation3_mm = NA_real_))
}
# Find nearest grid cell for coordinates
lon_idx <- which.min(abs(lon_grid - year_data$dec_long_va[i]))
lat_idx <- which.min(abs(lat_grid - year_data$dec_lat_va[i]))
# Define 3-day window (flood day and 2 days prior)
date_window <- seq(from = flood_date - days(2), to = flood_date, by = "day")
# Find time indices for all 3 days
time_indices <- map_int(date_window, ~ {
idx <- which(file_dates == .x)
if (length(idx) == 0) return(NA_integer_) else return(idx[1])
})
# Extract precipitation for single day (flood day)
flood_day_idx <- time_indices[3] # Last day in window
if (is.na(flood_day_idx)) {
single_day_precip <- NA_real_
} else {
single_day_precip <- ncvar_get(nc, precip_var_name,
start = c(lon_idx, lat_idx, flood_day_idx),
count = c(1, 1, 1))
}
# Extract precipitation for 3-day window
if (any(is.na(time_indices))) {
three_day_precip <- NA_real_
} else {
# Extract all 3 days at once
precip_values <- map_dbl(time_indices, ~ {
ncvar_get(nc, precip_var_name,
start = c(lon_idx, lat_idx, .x),
count = c(1, 1, 1))
})
three_day_precip <- sum(precip_values, na.rm = TRUE)
}
return(tibble(
precipitation_mm = as.numeric(single_day_precip),
precipitation3_mm = as.numeric(three_day_precip)
))
})
# Close NetCDF file
nc_close(nc)
# Add precipitation values to the data
year_data |> bind_cols(precip_results)
}, error = function(e) {
warning("Error processing year ", year, ": ", e$message)
year_data |>
mutate(precipitation_mm = NA_real_,
precipitation3_mm = NA_real_)
})
}
# Process each year separately
floods_with_precip <- floods_with_coords |>
group_by(year) |>
group_modify(~ extract_year_precipitation(.x, .y$year)) |>
ungroup()
## Processing year 1948 with 37 flood events...
## Processing year 1949 with 13 flood events...
## Processing year 1950 with 38 flood events...
## Processing year 1951 with 31 flood events...
## Processing year 1952 with 37 flood events...
## Processing year 1953 with 38 flood events...
## Processing year 1954 with 38 flood events...
## Processing year 1955 with 2 flood events...
## Processing year 1956 with 38 flood events...
## Processing year 1957 with 27 flood events...
## Processing year 1958 with 34 flood events...
## Processing year 1959 with 38 flood events...
## Processing year 1960 with 34 flood events...
## Processing year 1961 with 38 flood events...
## Processing year 1962 with 36 flood events...
## Processing year 1963 with 38 flood events...
## Processing year 1964 with 38 flood events...
## Processing year 1965 with 35 flood events...
## Processing year 1966 with 40 flood events...
## Processing year 1967 with 40 flood events...
## Processing year 1968 with 41 flood events...
## Processing year 1969 with 41 flood events...
## Processing year 1970 with 36 flood events...
## Processing year 1971 with 23 flood events...
## Processing year 1972 with 40 flood events...
## Processing year 1973 with 5 flood events...
## Processing year 1974 with 38 flood events...
## Processing year 1975 with 36 flood events...
## Processing year 1976 with 40 flood events...
## Processing year 1977 with 18 flood events...
## Processing year 1978 with 39 flood events...
## Processing year 1979 with 31 flood events...
## Processing year 1980 with 31 flood events...
## Processing year 1981 with 37 flood events...
## Processing year 1982 with 33 flood events...
## Processing year 1983 with 33 flood events...
## Processing year 1984 with 34 flood events...
## Processing year 1985 with 38 flood events...
## Processing year 1986 with 16 flood events...
## Processing year 1987 with 22 flood events...
## Processing year 1988 with 32 flood events...
## Processing year 1989 with 38 flood events...
## Processing year 1990 with 38 flood events...
## Processing year 1991 with 21 flood events...
## Processing year 1992 with 23 flood events...
## Processing year 1993 with 33 flood events...
## Processing year 1994 with 38 flood events...
## Processing year 1995 with 32 flood events...
## Processing year 1996 with 37 flood events...
## Processing year 1997 with 26 flood events...
## Processing year 1998 with 35 flood events...
## Processing year 1999 with 34 flood events...
## Processing year 2000 with 35 flood events...
## Processing year 2001 with 35 flood events...
## Processing year 2002 with 34 flood events...
## Processing year 2003 with 33 flood events...
## Processing year 2004 with 19 flood events...
## Processing year 2005 with 31 flood events...
## Processing year 2006 with 11 flood events...
## Processing year 2007 with 30 flood events...
## Processing year 2008 with 21 flood events...
## Processing year 2009 with 17 flood events...
## Processing year 2010 with 33 flood events...
## Processing year 2011 with 34 flood events...
## Processing year 2012 with 20 flood events...
## Processing year 2013 with 33 flood events...
## Processing year 2014 with 23 flood events...
## Processing year 2015 with 36 flood events...
## Processing year 2016 with 35 flood events...
## Processing year 2017 with 33 flood events...
## Processing year 2018 with 36 flood events...
## Processing year 2019 with 34 flood events...
## Processing year 2020 with 18 flood events...
## Processing year 2021 with 35 flood events...
## Processing year 2022 with 36 flood events...
## Processing year 2023 with 26 flood events...
## Processing year 2024 with 29 flood events...
head(floods_with_precip)
## # A tibble: 6 × 14
## year site_no station_name max_flood_date max_flood_discharge max_flood_month
## <int> <fct> <fct> <date> <int> <int>
## 1 1948 03050000 TYGART VALL… 1948-02-14 7640 2
## 2 1948 03050500 TYGART VALL… 1948-02-14 8810 2
## 3 1948 03051000 TYGART VALL… 1948-02-14 13200 2
## 4 1948 03052000 MIDDLE FORK… 1948-02-14 8470 2
## 5 1948 03052500 SAND RUN NE… 1948-03-24 1320 3
## 6 1948 03053500 BUCKHANNON … 1948-02-14 10300 2
## # ℹ 8 more variables: max_flood_day <int>, season <fct>, station_nm <fct>,
## # dec_lat_va <dbl>, dec_long_va <dbl>, drain_area_va <dbl>,
## # precipitation_mm <dbl>, precipitation3_mm <dbl>
# Check extraction results
extraction_summary <- floods_with_precip |>
summarise(
total_events = n(),
successful_1day = sum(!is.na(precipitation_mm)),
successful_3day = sum(!is.na(precipitation3_mm)),
failed_1day = sum(is.na(precipitation_mm)),
failed_3day = sum(is.na(precipitation3_mm)),
success_rate_1day = successful_1day / total_events * 100,
success_rate_3day = successful_3day / total_events * 100,
mean_1day_precip = mean(precipitation_mm, na.rm = TRUE),
mean_3day_precip = mean(precipitation3_mm, na.rm = TRUE),
median_1day_precip = median(precipitation_mm, na.rm = TRUE),
median_3day_precip = median(precipitation3_mm, na.rm = TRUE),
.groups = "drop"
)
print(extraction_summary)
## # A tibble: 1 × 11
## total_events successful_1day successful_3day failed_1day failed_3day
## <int> <int> <int> <int> <int>
## 1 2416 2383 2383 33 33
## # ℹ 6 more variables: success_rate_1day <dbl>, success_rate_3day <dbl>,
## # mean_1day_precip <dbl>, mean_3day_precip <dbl>, median_1day_precip <dbl>,
## # median_3day_precip <dbl>
# Summary by year
year_summary <- floods_with_precip |>
group_by(year) |>
summarise(
events = n(),
successful_1day = sum(!is.na(precipitation_mm)),
successful_3day = sum(!is.na(precipitation3_mm)),
mean_1day_precip = mean(precipitation_mm, na.rm = TRUE),
mean_3day_precip = mean(precipitation3_mm, na.rm = TRUE),
.groups = "drop"
) |>
arrange(year)
print(year_summary)
## # A tibble: 77 × 6
## year events successful_1day successful_3day mean_1day_precip
## <int> <int> <int> <int> <dbl>
## 1 1948 37 37 37 19.7
## 2 1949 13 13 13 20.6
## 3 1950 38 38 38 34.4
## 4 1951 31 31 31 28.8
## 5 1952 37 37 37 23.4
## 6 1953 38 38 38 16.9
## 7 1954 38 38 38 50.1
## 8 1955 2 2 2 44.5
## 9 1956 38 38 38 46.6
## 10 1957 27 27 27 32.3
## # ℹ 67 more rows
## # ℹ 1 more variable: mean_3day_precip <dbl>
# Check how many Jan 1-2 events were skipped
jan12_events <- floods_with_precip |>
filter(month(max_flood_date) == 1, day(max_flood_date) <= 2) |>
nrow()
cat("Events on Jan 1-2 (skipped for 3-day calculation):", jan12_events, "\n")
## Events on Jan 1-2 (skipped for 3-day calculation): 33
# Prepare final dataset for export
final_dataset <- floods_with_precip |>
select(
site_no, station_name, year, max_flood_date,
max_flood_discharge, max_flood_month, max_flood_day,
precipitation_mm, precipitation3_mm
)
# Write to CSV
output_file <- "max_annual_floods_with_precipitation.csv"
write_csv(final_dataset, output_file)
# Get unique sites from the dataframe
unique_sites <- floods_with_precip |>
select(site_no, station_name) |>
distinct() |>
arrange(site_no)
cat("Results written to:", output_file, "\n")
## Results written to: max_annual_floods_with_precipitation.csv
cat("Final dataset contains", nrow(final_dataset), "flood events for", nrow(unique_sites), "sites\n")
## Final dataset contains 2416 flood events for 47 sites
# Basic plots to verify the data looks reasonable
# Distribution of 1-day precipitation
p1 <- floods_with_precip |>
filter(!is.na(precipitation_mm)) |>
ggplot(aes(x = precipitation_mm)) +
geom_histogram(bins = 50, alpha = 0.7, fill = "steelblue") +
labs(
title = "1-Day Precipitation on Flood Days",
x = "Precipitation (mm)",
y = "Count"
) +
theme_minimal()
# Distribution of 3-day precipitation
p2 <- floods_with_precip |>
filter(!is.na(precipitation3_mm)) |>
ggplot(aes(x = precipitation3_mm)) +
geom_histogram(bins = 50, alpha = 0.7, fill = "darkgreen") +
labs(
title = "3-Day Precipitation Totals",
x = "3-Day Precipitation (mm)",
y = "Count"
) +
theme_minimal()
print(p1)
print(p2)
# Comparison of 1-day vs 3-day precipitation
floods_with_precip |>
filter(!is.na(precipitation_mm), !is.na(precipitation3_mm)) |>
ggplot(aes(x = precipitation_mm, y = precipitation3_mm)) +
geom_point(alpha = 0.6, color = "purple") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
labs(
title = "1-Day vs 3-Day Precipitation",
x = "1-Day Precipitation (mm)",
y = "3-Day Precipitation (mm)",
subtitle = "Dashed line shows 1:1 relationship"
) +
theme_minimal()
# Precipitation vs discharge relationship (using 3-day totals)
plot_data <- floods_with_precip |>
filter(!is.na(precipitation3_mm), !is.na(max_flood_discharge))
# Sample for plotting if too many points
if (nrow(plot_data) > 1000) {
plot_data <- plot_data |> slice_sample(n = 1000)
}
plot_data |>
ggplot(aes(x = precipitation3_mm, y = max_flood_discharge)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "3-Day Precipitation vs Maximum Flood Discharge",
x = "3-Day Precipitation (mm)",
y = "Maximum Flood Discharge"
) +
theme_minimal()
# Create plots subdirectory if it doesn't exist
if (!dir.exists("plots")) {
dir.create("plots")
}
# Function to create scatter plot for a single site
plot_site_precipitation <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have data to plot
plot_data <- site_data |>
filter(!is.na(precipitation3_mm), !is.na(max_flood_discharge), !is.na(season))
if (nrow(plot_data) == 0) {
cat("No valid data for site", site_number, "-", station_name, "\n")
return(NULL)
}
# Create the plot
p <- plot_data |>
ggplot(aes(x = precipitation3_mm, y = max_flood_discharge, color = season)) +
geom_point(size = 2.5, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
#stat_poly_eq(aes(label = after_stat(eq.label)), parse = TRUE, color = "black", size = 3.5, label.x = 0.05, label.y = 0.95) +
scale_color_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_y_continuous(labels = comma_format()) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
x = "3-Day Precipitation (mm)",
y = "Flood Discharge (cfs)",
subtitle = paste0("n = ", nrow(plot_data), " flood events")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "right"
)
return(p)
}
#cat("Creating plots for", nrow(unique_sites), "unique sites...\n")
# Loop through each site and create plots
site_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
# Filter data for current site
site_data <- floods_with_precip |>
filter(site_no == current_site)
# Create plot
plot_obj <- plot_site_precipitation(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
# Store plot in list
site_plots[[current_site]] <- plot_obj
# Print plot
print(plot_obj)
# Optional: save individual plots to files
ggsave(
file.path("plots", paste0("site_", current_site, "_precipitation_discharge.png")),
plot_obj, width = 10, height = 6, dpi = 300)
}
#cat("Completed site", i, "of", nrow(unique_sites), "\n")
}
cat("All site plots completed!\n")
## All site plots completed!
#' Create temporal trends scatterplot for a single site
#'
#' This function creates a scatterplot showing how maximum annual flood
#' discharge varies over time. Each point represents a single flood event,
#' colored by season with an optional trend line.
#'
#' @param site_data A data frame containing flood events for a single site.
#' Must include columns: `year`, `max_flood_discharge`, `season`
#' @param site_number Character/factor. The site number for labeling
#' @param station_name Character/factor. The station name for labeling
#'
#' @return A ggplot object showing temporal trends, or NULL if insufficient data
#'
#' @details
#' **Plot Features:**
#' - Scatterplot with each point representing one flood event per year
#' - Points colored by season using consistent color scheme
#' - Optional trend line to show long-term patterns
#' - Y-axis formatted with comma separators for readability
#'
#' **Minimum Data Requirements:**
#' - Requires at least 10 flood events with valid years, discharges, and seasons
#'
plot_site_temporal_trends <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have sufficient data
plot_data <- site_data |>
filter(!is.na(year), !is.na(max_flood_discharge), !is.na(season))
if (nrow(plot_data) < 10) {
cat("Insufficient data for site", site_number, "-", station_name, "(", nrow(plot_data), "events)\n")
return(NULL)
}
# Calculate year range for better x-axis scaling
year_range <- range(plot_data$year, na.rm = TRUE)
year_span <- diff(year_range)
# Create the plot
p <- plot_data |>
ggplot(aes(x = year, y = max_flood_discharge, color = season)) +
geom_point(size = 2.5, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
scale_color_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_x_continuous(
breaks = if (year_span <= 20) seq(year_range[1], year_range[2], by = 2) else waiver(),
minor_breaks = if (year_span <= 40) seq(year_range[1], year_range[2], by = 1) else waiver()
) +
scale_y_continuous(labels = comma_format()) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
x = "Year",
y = "Flood Discharge (cfs)",
subtitle = paste0("Temporal trends in maximum annual floods (n = ", nrow(plot_data), " events)")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "right",
panel.grid.minor.x = element_line(color = "grey90", linewidth = 0.3)
)
return(p)
}
#cat("Creating temporal trends plots for", nrow(unique_sites), "unique sites...\n")
# Loop through each site and create temporal trends plots
temporal_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
# Filter data for current site
site_data <- floods_with_precip |>
filter(site_no == current_site)
# Create plot
plot_obj <- plot_site_temporal_trends(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
# Store plot in list
temporal_plots[[current_site]] <- plot_obj
# Print plot
print(plot_obj)
# Save plot
ggsave(
file.path("plots", paste0("site_", current_site, "_temporal_trends.png")),
plot_obj, width = 10, height = 6, dpi = 300
)
}
#cat("Completed temporal trends plot", i, "of", nrow(unique_sites), "\n")
}
cat("All temporal trends plots completed!\n")
## All temporal trends plots completed!
#' Create monthly flood distribution bar plot for a single site
#'
#' This function creates a bar plot showing the distribution of maximum annual
#' flood events across calendar months. Each bar represents the count of flood
#' events occurring in that month, colored by season.
#'
#' @param site_data A data frame containing flood events for a single site.
#' Must include columns: `max_flood_month`, `season`
#' @param site_number Character/factor. The site number for labeling
#' @param station_name Character/factor. The station name for labeling
#'
#' @return A ggplot object showing monthly flood distribution, or NULL if insufficient data
#'
#' @details
#' **Plot Features:**
#' - Bar plot with months on x-axis and event counts on y-axis
#' - Bars colored by season using consistent color scheme
#' - All 12 months shown even if no events occurred
#' - Month abbreviations on x-axis for readability
#'
#' **Season Mapping:**
#' - Winter: December, January, February
#' - Spring: March, April, May
#' - Summer: June, July, August
#' - Autumn: September, October, November
#'
#' **Minimum Data Requirements:**
#' - Requires at least 5 flood events with valid months
#'
plot_site_monthly_distribution <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have sufficient data
plot_data <- site_data |>
filter(!is.na(max_flood_month))
if (nrow(plot_data) < 5) {
cat("Insufficient data for site", site_number, "-", station_name, "(", nrow(plot_data), "events)\n")
return(NULL)
}
# Create complete month reference with season mapping
month_reference <- tibble(
month_num = 1:12,
month_name = month.abb,
season = case_when(
month_num %in% c(12, 1, 2) ~ "Winter",
month_num %in% c( 3, 4, 5) ~ "Spring",
month_num %in% c( 6, 7, 8) ~ "Summer",
month_num %in% c( 9, 10, 11) ~ "Autumn"
)
)
# Count events by month and join with complete month reference
monthly_counts <- plot_data |>
count(max_flood_month, name = "flood_count") |>
right_join(month_reference, by = c("max_flood_month" = "month_num")) |>
mutate(
flood_count = replace_na(flood_count, 0),
month_name = factor(month_name, levels = month.abb)
)
# Create the plot
p <- monthly_counts |>
ggplot(aes(x = month_name, y = flood_count, fill = season)) +
geom_col(alpha = 0.8, color = "white", linewidth = 0.3) +
scale_fill_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_y_continuous(
breaks = function(x) seq(0, max(x), by = max(1, ceiling(max(x)/8))),
expand = expansion(mult = c(0, 0.05))
) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
x = "Month",
y = "Number of Flood Events",
subtitle = paste0("Monthly distribution of maximum annual floods (n = ", sum(monthly_counts$flood_count), " events)")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "right",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5)
)
return(p)
}
#cat("Creating monthly distribution plots for", nrow(unique_sites), "unique sites...\n")
# Loop through each site and create monthly distribution plots
monthly_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
# Filter data for current site
site_data <- floods_with_precip |>
filter(site_no == current_site)
# Create plot
plot_obj <- plot_site_monthly_distribution(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
# Store plot in list
monthly_plots[[current_site]] <- plot_obj
# Print plot
print(plot_obj)
# Save plot
ggsave(
file.path("plots", paste0("site_", current_site, "_monthly_distribution.png")),
plot_obj, width = 10, height = 6, dpi = 300
)
}
#cat("Completed monthly distribution plot", i, "of", nrow(unique_sites), "\n")
}
cat("All monthly distribution plots completed!\n")
## All monthly distribution plots completed!
# Function 2: Faceted scatter plots by season
plot_site_faceted <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have data to plot
plot_data <- site_data |>
filter(!is.na(precipitation3_mm), !is.na(max_flood_discharge), !is.na(season))
if (nrow(plot_data) == 0) {
cat("No valid data for site", site_number, "-", station_name, "\n")
return(NULL)
}
# Calculate reasonable axis limits based on actual data
x_max <- max(plot_data$precipitation3_mm, na.rm = TRUE) * 1.05
y_max <- max(plot_data$max_flood_discharge, na.rm = TRUE) * 1.05
# Create the faceted scatter plot
p <- plot_data |>
ggplot(aes(x = precipitation3_mm, y = max_flood_discharge)) +
geom_point(aes(color = season), size = 2, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 0.8) +
facet_wrap(~season, scales = "fixed", ncol = 2) +
coord_cartesian(xlim = c(0, x_max), ylim = c(0, y_max)) + # Constrain axes
scale_color_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_y_continuous(labels = comma_format()) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
x = "3-Day Precipitation (mm)",
y = "Flood Discharge (cfs)",
subtitle = paste0("Seasonal precipitation-discharge relationships (n = ", nrow(plot_data), " events)")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "none",
strip.text = element_text(face = "bold")
)
return(p)
}
# Loop 2: Create faceted scatter plots for each site
#cat("Creating faceted scatter plots for", nrow(unique_sites), "unique sites...\n")
faceted_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
site_data <- floods_with_precip |>
filter(site_no == current_site)
plot_obj <- plot_site_faceted(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
faceted_plots[[current_site]] <- plot_obj
print(plot_obj)
# Save plot
ggsave(
file.path("plots", paste0("site_", current_site, "_faceted_seasonal.png")),
plot_obj, width = 10, height = 8, dpi = 300)
}
#cat("Completed faceted plot", i, "of", nrow(unique_sites), "\n")
}
# Function 3: Ridge plots of discharge by season
plot_site_ridges <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have data to plot
plot_data <- site_data |>
filter(!is.na(max_flood_discharge), !is.na(season))
if (nrow(plot_data) == 0) {
cat("No valid data for site", site_number, "-", station_name, "\n")
return(NULL)
}
# Create the ridge plot
p <- plot_data |>
ggplot(aes(x = max_flood_discharge, y = season, fill = season)) +
geom_density_ridges(alpha = 0.7, scale = 1.2, rel_min_height = 0.01) +
scale_fill_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_x_continuous(labels = comma_format()) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
x = "Flood Discharge (cfs)",
y = "Season",
subtitle = paste0("Seasonal flood discharge distributions (n = ", nrow(plot_data), " events)")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "none"
)
return(p)
}
# Loop 3: Create ridge plots for each site
#cat("Creating ridge plots for", nrow(unique_sites), "unique sites...\n")
ridge_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
site_data <- floods_with_precip |>
filter(site_no == current_site)
plot_obj <- plot_site_ridges(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
ridge_plots[[current_site]] <- plot_obj
print(plot_obj)
# Save plot
ggsave(
file.path("plots", paste0("site_", current_site, "_ridges_seasonal.png")),
plot_obj, width = 8, height = 6, dpi = 300)
}
#cat("Completed ridge plot", i, "of", nrow(unique_sites), "\n")
}
#' Create day-of-year flood discharge scatterplot for a single site
#'
#' This function creates a scatterplot showing the timing and magnitude of
#' all annual maximum flood events throughout the calendar year. Each point
#' represents a single flood event, colored by season.
#'
#' @param site_data A data frame containing flood events for a single site.
#' Must include columns: `max_flood_date`, `max_flood_discharge`, `season`
#' @param site_number Character/factor. The site number for labeling
#' @param station_name Character/factor. The station name for labeling
#'
#' @return A ggplot object showing day-of-year flood patterns, or NULL if insufficient data
#'
#' @details
#' **Data Processing:**
#' - Converts February 29 dates to February 28 for consistent 365-day year
#' - Calculates day-of-year (1-365) for each flood event
#' - Uses existing season classifications for color coding
#'
#' **Plot Features:**
#' - Scatterplot with each point representing one flood event
#' - Points colored by season using consistent color scheme
#' - Seasonal background shading for visual reference
#' - Logarithmic y-axis to handle wide discharge ranges
#'
#' **Minimum Data Requirements:**
#' - Requires at least 10 flood events with valid dates and discharges
#'
#' @examples
#' \dontrun{
#' # Example site_data structure:
#' site_floods <- data.frame(
#' max_flood_date = as.Date(c("2020-03-15", "2021-07-22", "2022-12-01")),
#' max_flood_discharge = c(15000, 8500, 12000),
#' season = c("Spring", "Summer", "Winter")
#' )
#'
#' # Create day-of-year scatterplot
#' plot <- plot_site_day_of_year(site_floods, "12345678", "Example River")
#' }
#'
plot_site_day_of_year <- function(site_data, site_number, station_name) {
# Filter out NA values and ensure we have sufficient data
plot_data <- site_data |>
filter(!is.na(max_flood_date), !is.na(max_flood_discharge), !is.na(season))
if (nrow(plot_data) < 10) {
cat("Insufficient data for site", site_number, "-", station_name, "(", nrow(plot_data), "events)\n")
return(NULL)
}
# Convert dates to day-of-year, handling leap years
plot_data <- plot_data |>
mutate(
# Convert Feb 29 to Feb 28 for consistency
adjusted_date = if_else(
month(max_flood_date) == 2 & day(max_flood_date) == 29,
as.Date(paste0(year(max_flood_date), "-02-28")),
max_flood_date
),
# Calculate day of year (1-365) and create date labels for x-axis
day_of_year = as.numeric(format(adjusted_date, "%j")),
date_label = as.Date(paste0("2020-01-01")) + days(day_of_year - 1)
)
# Define seasonal boundaries for background shading
seasonal_bounds <- tribble(
~season, ~start_day, ~end_day, ~color,
"Winter", 1, 59, "#E6F3FF", # Dec 21 - Feb 28 (approx)
"Spring", 60, 151, "#E6FFE6", # Mar 1 - May 31 (approx)
"Summer", 152, 243, "#FFE6E6", # Jun 1 - Aug 31 (approx)
"Autumn", 244, 334, "#FFF0E6", # Sep 1 - Nov 30 (approx)
"Winter", 335, 365, "#E6F3FF" # Dec 1 - Dec 31 (approx)
)
# Create the plot
p <- ggplot(plot_data, aes(x = date_label)) +
# Add seasonal background shading
geom_rect(
data = seasonal_bounds,
aes(xmin = as.Date("2020-01-01") + days(start_day - 1),
xmax = as.Date("2020-01-01") + days(end_day - 1),
ymin = -Inf, ymax = Inf),
fill = seasonal_bounds$color,
alpha = 0.3,
inherit.aes = FALSE
) +
# Scatterplot points colored by season
geom_point(aes(y = max_flood_discharge, color = season),
size = 2.5, alpha = 0.7) +
scale_color_manual(
name = "Season",
values = c("Winter" = "#2166ac", "Spring" = "#5aae61", "Summer" = "#d73027", "Autumn" = "#762a83")
) +
scale_x_date(
date_labels = "%b",
date_breaks = "1 month",
expand = c(0.01, 0.01)
) +
scale_y_log10(
labels = scales::comma_format(),
expand = c(0.02, 0.02)
) +
labs(
title = paste0("Site ", site_number, ": ", station_name),
subtitle = paste0("Seasonal timing of maximum annual floods (n = ", nrow(plot_data), " events)"),
x = "Month",
y = "Flood Discharge (cfs, log scale)"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.position = "right",
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5)
)
return(p)
}
#cat("Creating day-of-year analysis plots for", nrow(unique_sites), "unique sites...\n")
# Loop through each site and create day-of-year plots
day_of_year_plots <- list()
for (i in 1:nrow(unique_sites)) {
current_site <- unique_sites$site_no[i]
current_name <- unique_sites$station_name[i]
# Filter data for current site
site_data <- floods_with_precip |>
filter(site_no == current_site)
# Create plot
plot_obj <- plot_site_day_of_year(site_data, current_site, current_name)
if (!is.null(plot_obj)) {
# Store plot in list
day_of_year_plots[[current_site]] <- plot_obj
# Print plot
print(plot_obj)
# Save plot
ggsave(
file.path("plots", paste0("site_", current_site, "_day_of_year_analysis.png")),
plot_obj, width = 12, height = 8, dpi = 300
)
}
#cat("Completed day-of-year analysis", i, "of", nrow(unique_sites), "\n")
}