knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
fig.width = 9,
fig.height = 5.5,
dpi = 300
)
library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(janitor)
library(stringr)
library(lubridate)
# Optional shoreline / water-context pull from TIGER.
has_tigris <- requireNamespace("tigris", quietly = TRUE)
if (has_tigris) {
options(tigris_use_cache = TRUE)
}
dir.create("figures", showWarnings = FALSE)
dir.create("outputs", showWarnings = FALSE)
dir.create("outputs/rasters", recursive = TRUE, showWarnings = FALSE)
dir.create("outputs/tables", recursive = TRUE, showWarnings = FALSE)
The recent collapse of Peconic bay scallops provides the immediate ecological context for this analysis. Beginning in 2019, adult scallops experienced major die-offs in the Peconic Bays, despite prior restoration activity and continuing juvenile recruitment. The Peconic Estuary Partnership convened a technical review committee after the 2019 adult scallop die-off to assess possible drivers, and NYSDEC’s species assessment links the collapse to interacting stressors including high summer water temperatures, low dissolved oxygen, and a coccidian parasite. The cause of the crash is still not resolved as a single-variable event, so the analysis below treats the estuary as a coupled physical and water-quality system rather than as a scallop-yield model.
Peconic Bay scallop landings context from the Peconic bay report.
The study window is treated as a recent 2014–2025 snapshot: 2014 provides a useful decadal reference point, 2019–2020 marks the recent scallop-collapse interval, and 2024–2025 provides the current comparison window.
The Peconic Estuary sits between the two forks of Long Island, with shallow embayments near Riverhead opening eastward toward Great Peconic Bay, Gardiners Bay, and the Atlantic-facing exchange system. The system is generally shallow, especially across the western Peconic and nearshore embayments, so small differences in bottom depth and slope may matter for light availability, local exchange, and water-column stress. I treat bathymetry as the fixed physical template: once a working bottom surface is built, the same depth and slope classes can be used as spatial bins for later chlorophyll-a, dissolved oxygen, nitrogen, temperature, eelgrass, or shellfish-management comparisons.
The ecological motivation is the recent Peconic bay scallop collapse layered on top of a longer brown-tide and eutrophication history. The working assumption is modest: shallow flats, deeper basins, and sharper bathymetric gradients may differ in residence time, exchange, light exposure, and susceptibility to algal or low-oxygen conditions. The structure is a simplified raster proxy, drawing on the general idea that estuarine bathymetry can condition exchange and residual circulation.
The first half of the workflow builds the bathymetric template. The second half adds the Peconic nitrogen loading model as a static watershed-pressure layer and attaches that layer to SCDHS water-quality stations through a nearest-subwatershed rule. The resulting station covariate table gives each station a fixed spatial context for later DO, chlorophyll-a, Secchi, nutrient, and temperature summaries.
The bathymetry layer combines two finished NOAA raster DEM products:
For the initial data cleaning, I used GDAL outside R to download and agglomerate the NOAA 2023 DEM tiles from the VRT. The 2023 topobathymetric product was clipped to the Peconic Estuary Program boundary and resampled to a 10 m working grid because near-shoreline features are still important here, while the full native-resolution DEM is unnecessarily heavy for this analysis. The Gardiners Bay M030 30 m DEM was then brought into the same grid. In R, both products are converted to positive water depth by retaining only cells below 0 m. The final depth layer keeps the 2023 topobathymetry wherever measured coverage exists, then fills remaining bathymetric cells from M030.
This chunk documents the terminal-side preprocessing. It is left unevaluated because the downloaded and clipped products already exist locally.
# From the project root:
cd ~/Hunter/S26/GTECH-385/final-project
# 1. Build / use the NOAA 2023 DEM VRT and clip to the PEP boundary at 10 m.
# The source VRT was created from downloaded NOAA 10274 DEM tiles.
gdalwarp \
-overwrite \
-multi \
-wo NUM_THREADS=ALL_CPUS \
-wm 1024 \
-cutline data/Peconic_Estuary_Program_Boundary_EPSG6347.geojson \
-crop_to_cutline \
-tr 10 10 \
-tap \
-r bilinear \
-dstnodata -999999 \
-co COMPRESS=LZW \
-co TILED=YES \
-co BIGTIFF=IF_SAFER \
data/dem/noaa_10274_downloaded_tiles.vrt \
data/dem/peconic_dem_10m_boundary_10274.tif
# 2. Download Gardiners Bay M030 30 m bathymetry.
wget -O data/dem/gardiners_bay_m030_30m.nc \
"https://www.ngdc.noaa.gov/thredds/fileServer/regional/gardiners_bay_m030_30m.nc"
paths <- list(
dem_2023 = "data/dem/peconic_dem_10m_boundary_10274.tif",
dem_m030 = "data/dem/gardiners_bay_m030_30m.nc",
pep_boundary = "data/Peconic_Estuary_Program_Boundary.geojson"
)
dem23 <- rast(paths$dem_2023)
m030_raw <- rast(paths$dem_m030)
pep_boundary <- st_read(paths$pep_boundary, quiet = TRUE)
# Project the PEP boundary to the 2023 DEM CRS for masking and plotting.
pep_6347 <- st_transform(pep_boundary, crs(dem23))
pep_v <- vect(pep_6347)
dem23
## class : SpatRaster
## size : 3807, 8603, 1 (nrow, ncol, nlyr)
## resolution : 10, 10 (x, y)
## extent : 678110, 764140, 4525640, 4563710 (xmin, xmax, ymin, ymax)
## coord. ref. : NAD83(2011) / UTM zone 18N (EPSG:6347)
## source : peconic_dem_10m_boundary_10274.tif
## name : peconic_dem_10m_boundary_10274
## min value : -13.31304
## max value : 55.06969
m030_raw
## class : SpatRaster
## size : 1010, 2064, 1 (nrow, ncol, nlyr)
## resolution : 30, 30 (x, y)
## extent : 698199.6, 760119.6, 4530077, 4560377 (xmin, xmax, ymin, ymax)
## coord. ref. : NAD83 / UTM zone 18N (EPSG:26918)
## source : gardiners_bay_m030_30m.nc
## varname : Band1 (GDAL Band Number 1)
## name : Band1
# TIGER area-water polygons are only used for visual context.
shoreline_ctx <- NULL
if (has_tigris) {
shoreline_ctx <- tryCatch({
suffolk_water <- tigris::area_water(state = "NY", county = "Suffolk", class = "sf")
nassau_water <- tigris::area_water(state = "NY", county = "Nassau", class = "sf")
bind_rows(suffolk_water, nassau_water) |>
st_transform(st_crs(pep_6347)) |>
st_crop(st_bbox(pep_6347))
}, error = function(e) NULL)
}
if (is.null(shoreline_ctx)) {
message("No TIGER area-water shoreline context loaded; using PEP boundary only.")
}
dem_ranges <- tibble(
raster = c("2023 topobathymetric DEM", "M030 Gardiners Bay DEM"),
min_elevation_m = c(
as.numeric(global(dem23, min, na.rm = TRUE)[1, 1]),
as.numeric(global(m030_raw, min, na.rm = TRUE)[1, 1])
),
max_elevation_m = c(
as.numeric(global(dem23, max, na.rm = TRUE)[1, 1]),
as.numeric(global(m030_raw, max, na.rm = TRUE)[1, 1])
)
)
knitr::kable(dem_ranges, digits = 2)
| raster | min_elevation_m | max_elevation_m |
|---|---|---|
| 2023 topobathymetric DEM | -13.31 | 55.07 |
| M030 Gardiners Bay DEM | -29.60 | 0.87 |
# Mask the 2023 product to the PEP boundary. It is already clipped, but this keeps the step explicit.
dem23_clip <- mask(crop(dem23, pep_v), pep_v)
# Project M030 onto the 2023 10 m working grid, then mask to the same PEP boundary.
m030_10m <- project(m030_raw, dem23_clip, method = "bilinear")
m030_clip <- mask(crop(m030_10m, pep_v), pep_v)
# Convert measured below-zero elevations to positive water depth.
# Cells >= 0 are land/upland or above vertical datum and are removed from bathymetric analysis.
depth23 <- abs(ifel(dem23_clip < 0, dem23_clip, NA))
depth_m030 <- abs(ifel(m030_clip < 0, m030_clip, NA))
names(depth23) <- "depth_2023_m"
names(depth_m030) <- "depth_m030_m"
# Keep 2023 topobathy wherever it exists; fill remaining bathymetric cells from M030.
depth_final <- cover(depth23, depth_m030)
names(depth_final) <- "depth_m"
# Track source. This is used for interpretation, not as a response variable.
depth_source <- ifel(!is.na(depth23), 1, ifel(!is.na(depth_m030), 2, NA))
names(depth_source) <- "depth_source"
writeRaster(depth23, "outputs/rasters/depth_2023_topobathy_measured_10m.tif", overwrite = TRUE)
writeRaster(depth_m030, "outputs/rasters/depth_m030_gardiners_resampled_10m.tif", overwrite = TRUE)
writeRaster(depth_final, "outputs/rasters/peconic_depth_10m_2023_with_m030_base.tif", overwrite = TRUE)
writeRaster(depth_source, "outputs/rasters/peconic_depth_source_2023_vs_m030.tif", overwrite = TRUE)
depth_stats <- tibble(
raster = c("2023 measured topobathy", "M030 Gardiners Bay base", "Combined depth layer"),
min_depth_m = c(
as.numeric(global(depth23, min, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_m030, min, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_final, min, na.rm = TRUE)[1, 1])
),
max_depth_m = c(
as.numeric(global(depth23, max, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_m030, max, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_final, max, na.rm = TRUE)[1, 1])
),
mean_depth_m = c(
as.numeric(global(depth23, mean, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_m030, mean, na.rm = TRUE)[1, 1]),
as.numeric(global(depth_final, mean, na.rm = TRUE)[1, 1])
),
valid_cells = c(
as.numeric(global(!is.na(depth23), sum, na.rm = TRUE)[1, 1]),
as.numeric(global(!is.na(depth_m030), sum, na.rm = TRUE)[1, 1]),
as.numeric(global(!is.na(depth_final), sum, na.rm = TRUE)[1, 1])
)
)
write_csv(depth_stats, "outputs/tables/depth_stats_2023_m030_combined.csv")
knitr::kable(depth_stats, digits = 2)
| raster | min_depth_m | max_depth_m | mean_depth_m | valid_cells |
|---|---|---|---|---|
| 2023 measured topobathy | 0 | 13.31 | 4.48 | 3268954 |
| M030 Gardiners Bay base | 0 | 29.54 | 7.31 | 4838533 |
| Combined depth layer | 0 | 29.54 | 7.17 | 5466411 |
source_key <- tibble(
depth_source = c(1, 2),
source_label = c("2023 NOAA topobathy", "M030 Gardiners Bay 30 m bathymetry")
)
write_csv(source_key, "outputs/tables/depth_source_key_2023_m030.csv")
knitr::kable(source_key)
| depth_source | source_label |
|---|---|
| 1 | 2023 NOAA topobathy |
| 2 | M030 Gardiners Bay 30 m bathymetry |
sample_raster_df <- function(r, value_name, size = 150000, method = "regular") {
df <- terra::spatSample(
r,
size = size,
method = method,
as.df = TRUE,
xy = TRUE,
na.rm = TRUE
) |>
as_tibble(.name_repair = "minimal")
df <- df[, 1:3]
names(df) <- c("x", "y", value_name)
df
}
mode_numeric <- function(x, ...) {
x <- x[!is.na(x)]
if (length(x) == 0) return(NA_real_)
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
make_source_outline <- function(source_raster, fact = 20) {
src_lowres <- terra::aggregate(source_raster, fact = fact, fun = mode_numeric, na.rm = TRUE)
src_poly <- as.polygons(src_lowres, dissolve = TRUE, na.rm = TRUE)
st_as_sf(src_poly) |>
mutate(
depth_source = factor(
depth_source,
levels = c(1, 2),
labels = c("2023 topobathy", "M030 Gardiners Bay")
)
) |>
filter(!is.na(depth_source))
}
depth_vals <- values(depth_final, na.rm = TRUE)
depth_98 <- as.numeric(quantile(depth_vals, 0.98, na.rm = TRUE))
depth_plot <- clamp(depth_final, 0, depth_98, values = TRUE)
depth_df <- sample_raster_df(depth_plot, "depth_m", size = 180000)
source_outline <- make_source_outline(depth_source, fact = 25)
p_depth <- ggplot() +
geom_raster(data = depth_df, aes(x = x, y = y, fill = depth_m)) +
{if (!is.null(shoreline_ctx)) geom_sf(data = shoreline_ctx, fill = NA, color = "grey35", linewidth = 0.18, inherit.aes = FALSE)} +
geom_sf(data = pep_6347, fill = NA, color = "black", linewidth = 0.35, inherit.aes = FALSE) +
geom_sf(data = source_outline, aes(color = depth_source), fill = NA, linewidth = 0.45, inherit.aes = FALSE) +
coord_sf(crs = st_crs(pep_6347), expand = FALSE) +
scale_fill_gradient(low = "#d9f0ff", high = "#08306b", name = "Depth (m)", limits = c(0, depth_98)) +
scale_color_manual(
values = c("2023 topobathy" = "steelblue4", "M030 Gardiners Bay" = "orange3"),
name = "Depth source outline"
) +
labs(
title = "Figure 1. Combined Peconic bathymetry layer",
subtitle = paste0("2023 topobathy retained where available; M030 fills broader bathymetry. Ramp capped at 98th percentile = ", round(depth_98, 2), " m."),
x = NULL,
y = NULL,
caption = "Blue/orange outlines mark the approximate source footprint used in the splice. TIGER area-water boundaries, when available, are drawn only as visual shoreline context."
) +
theme_minimal() +
theme(legend.position = "bottom")
print(p_depth)
ggsave("figures/figure_01_combined_peconic_bathymetry.png", p_depth, width = 9, height = 5.5, dpi = 300)
depth_hist <- tibble(depth_m = depth_vals)
p_depth_hist <- ggplot(depth_hist, aes(x = depth_m)) +
geom_histogram(bins = 70, fill = "steelblue4", color = "white") +
labs(
title = "Figure 2. Combined depth distribution",
subtitle = "Distribution after retaining 2023 topobathymetry and filling remaining bathymetry from M030",
x = "Depth (m)",
y = "Raster cell count"
) +
theme_minimal()
print(p_depth_hist)
ggsave("figures/figure_02_combined_depth_distribution.png", p_depth_hist, width = 7.5, height = 4.5, dpi = 300)
The final depth layer is used to derive slope, then slope is split into relative classes. I use relative categories here because the aim is to separate broad bathymetric regimes inside this estuary, rather than impose a universal slope threshold. Low-slope cells approximate flats and broad shallow shelves. High-slope cells identify sharper gradients, channel edges, and places where bathymetry may be more relevant to exchange or mixing proxies. These classes will later become the fixed spatial bins for chlorophyll-a, SST-change, dissolved oxygen, nitrogen, or eelgrass comparisons.
slope <- terrain(depth_final, v = "slope", unit = "degrees")
names(slope) <- "slope_deg"
writeRaster(slope, "outputs/rasters/peconic_slope_deg_10m.tif", overwrite = TRUE)
slope_vals <- values(slope, na.rm = TRUE)
slope_99 <- as.numeric(quantile(slope_vals, 0.99, na.rm = TRUE))
slope_plot <- clamp(slope, 0, slope_99, values = TRUE)
slope_df <- sample_raster_df(slope_plot, "slope_deg", size = 180000)
p_slope <- ggplot() +
geom_raster(data = slope_df, aes(x = x, y = y, fill = slope_deg)) +
{if (!is.null(shoreline_ctx)) geom_sf(data = shoreline_ctx, fill = NA, color = "grey35", linewidth = 0.18, inherit.aes = FALSE)} +
geom_sf(data = pep_6347, fill = NA, color = "black", linewidth = 0.35, inherit.aes = FALSE) +
coord_sf(crs = st_crs(pep_6347), expand = FALSE) +
scale_fill_gradient(low = "#f7fbff", high = "#08519c", name = "Slope (°)", limits = c(0, slope_99)) +
labs(
title = "Figure 3. Bathymetric slope from combined depth layer",
subtitle = paste0("Slope ramp capped at the 99th percentile = ", round(slope_99, 2), "° for display."),
x = NULL,
y = NULL
) +
theme_minimal() +
theme(legend.position = "bottom")
print(p_slope)
ggsave("figures/figure_03_bathymetric_slope.png", p_slope, width = 9, height = 5.5, dpi = 300)
# Coarse depth classes used as physical proxy bins.
depth_class <- classify(
depth_final,
rcl = matrix(
c(
0, 3, 1,
3, 8, 2,
8, Inf, 3
),
ncol = 3,
byrow = TRUE
)
)
names(depth_class) <- "depth_class"
depth_key <- tibble(
depth_class = c(1, 2, 3),
depth_label = c("shallow_0_3m", "mid_3_8m", "deep_gt_8m")
)
# Relative slope classes: terciles of the observed slope distribution.
slope_q <- quantile(slope_vals, probs = c(0, 1/3, 2/3, 1), na.rm = TRUE)
slope_class <- classify(
slope,
rcl = matrix(
c(
slope_q[1], slope_q[2], 1,
slope_q[2], slope_q[3], 2,
slope_q[3], slope_q[4], 3
),
ncol = 3,
byrow = TRUE
)
)
names(slope_class) <- "slope_class"
slope_key <- tibble(
slope_class = c(1, 2, 3),
slope_label = c("low_slope", "moderate_slope", "high_slope"),
lower_deg = as.numeric(slope_q[1:3]),
upper_deg = as.numeric(slope_q[2:4])
)
# Cross depth class with slope class to make combined bathymetric regimes.
bathy_regime <- depth_class * 10 + slope_class
names(bathy_regime) <- "bathy_regime"
bathy_key <- expand_grid(
depth_class = c(1, 2, 3),
slope_class = c(1, 2, 3)
) |>
left_join(depth_key, by = "depth_class") |>
left_join(slope_key |> select(slope_class, slope_label), by = "slope_class") |>
mutate(
bathy_regime = depth_class * 10 + slope_class,
bathy_label = paste(depth_label, slope_label, sep = " / ")
)
writeRaster(depth_class, "outputs/rasters/peconic_depth_class_10m.tif", overwrite = TRUE)
writeRaster(slope_class, "outputs/rasters/peconic_slope_class_10m.tif", overwrite = TRUE)
writeRaster(bathy_regime, "outputs/rasters/peconic_bathymetric_regime_10m.tif", overwrite = TRUE)
write_csv(depth_key, "outputs/tables/depth_class_key.csv")
write_csv(slope_key, "outputs/tables/slope_class_key.csv")
write_csv(bathy_key, "outputs/tables/bathymetric_regime_key.csv")
knitr::kable(slope_key, digits = 4)
| slope_class | slope_label | lower_deg | upper_deg |
|---|---|---|---|
| 1 | low_slope | 0.0000 | 0.1691 |
| 2 | moderate_slope | 0.1691 | 0.4715 |
| 3 | high_slope | 0.4715 | 41.8144 |
knitr::kable(bathy_key)
| depth_class | slope_class | depth_label | slope_label | bathy_regime | bathy_label |
|---|---|---|---|---|---|
| 1 | 1 | shallow_0_3m | low_slope | 11 | shallow_0_3m / low_slope |
| 1 | 2 | shallow_0_3m | moderate_slope | 12 | shallow_0_3m / moderate_slope |
| 1 | 3 | shallow_0_3m | high_slope | 13 | shallow_0_3m / high_slope |
| 2 | 1 | mid_3_8m | low_slope | 21 | mid_3_8m / low_slope |
| 2 | 2 | mid_3_8m | moderate_slope | 22 | mid_3_8m / moderate_slope |
| 2 | 3 | mid_3_8m | high_slope | 23 | mid_3_8m / high_slope |
| 3 | 1 | deep_gt_8m | low_slope | 31 | deep_gt_8m / low_slope |
| 3 | 2 | deep_gt_8m | moderate_slope | 32 | deep_gt_8m / moderate_slope |
| 3 | 3 | deep_gt_8m | high_slope | 33 | deep_gt_8m / high_slope |
bathy_df <- sample_raster_df(bathy_regime, "bathy_regime", size = 180000) |>
mutate(bathy_regime = as.numeric(bathy_regime)) |>
left_join(bathy_key, by = "bathy_regime")
regime_cols <- c(
"#deebf7", "#c6dbef", "#9ecae1",
"#6baed6", "#4292c6", "#2171b5",
"#08519c", "#08306b", "#041f4a"
)
names(regime_cols) <- unique(bathy_key$bathy_label)
p_regime <- ggplot() +
geom_raster(data = bathy_df, aes(x = x, y = y, fill = bathy_label)) +
{if (!is.null(shoreline_ctx)) geom_sf(data = shoreline_ctx, fill = NA, color = "grey35", linewidth = 0.18, inherit.aes = FALSE)} +
geom_sf(data = pep_6347, fill = NA, color = "black", linewidth = 0.35, inherit.aes = FALSE) +
coord_sf(crs = st_crs(pep_6347), expand = FALSE) +
scale_fill_manual(values = regime_cols, name = "Bathymetric regime") +
labs(
title = "Figure 4. Bathymetric proxy regimes",
subtitle = "Depth classes crossed with relative slope classes; used later as fixed spatial bins for water-quality proxies.",
x = NULL,
y = NULL
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.text = element_text(size = 7)
)
print(p_regime)
ggsave("figures/figure_04_bathymetric_proxy_regimes.png", p_regime, width = 9, height = 5.5, dpi = 300)
The next spatial layer is the Peconic Estuary nitrogen loading model. I treat it as a static watershed-pressure layer over the 2014–2024 analysis window. Land use, wastewater inputs, and fertilizer sources can change through time, so this layer is not an annual concentration record. Its use here is narrower: it provides a subwatershed-scale gradient of modeled land-based nitrogen pressure that can be attached to monitoring stations.
The Peconic nitrogen loading model contains 43 subwatersheds and
reports source categories including atmospheric deposition, wastewater
treatment facilities, septic systems, agriculture, lawns, and parks/golf
courses. The key field is kg_N_per_year_per_Ha, a yield
normalized by subwatershed area. Total is retained as the
absolute modeled load.
A marine station cannot be assigned to a terrestrial subwatershed by containment unless the station actually falls inside the polygon. Most water-quality stations sit in open water, so polygon intersection would produce missing values or misleading assignments. I therefore use a screening-level linkage rule: each station inherits the nitrogen yield of the nearest modeled subwatershed, and the station-to-subwatershed distance is retained. This is a shoreline-proximity association, not hydrodynamic transport. For visual context, I also extend the subwatershed influence zones across the estuary with a nearest-polygon/Voronoi-style partition clipped to the Peconic boundary.
The station layer becomes the limiting geometry from here forward. Each SCDHS station is assigned fixed spatial covariates: bathymetric depth, slope/regime, nearest nitrogen yield, and nearest-subwatershed distance. Later DO, chlorophyll-a, Secchi, temperature, salinity, and nutrient observations can be summarized by station-year and joined to this table.
The ArcGIS web-map metadata exposes the usable feature layer endpoint below. The shell chunk is kept for reproducibility, but the R workflow uses the local GeoPackage if it exists.
mkdir -p data/nitrogen
curl -L -G \
"https://services.arcgis.com/JsDD4qdG5r2a7hR5/arcgis/rest/services/NLM/FeatureServer/0/query" \
--data-urlencode "where=1=1" \
--data-urlencode "outFields=*" \
--data-urlencode "returnGeometry=true" \
--data-urlencode "f=geojson" \
-o data/nitrogen/peconic_nlm_subwatersheds.geojson
ogrinfo -so data/nitrogen/peconic_nlm_subwatersheds.geojson
ogr2ogr \
-f GPKG \
data/nitrogen/peconic_nlm_subwatersheds.gpkg \
data/nitrogen/peconic_nlm_subwatersheds.geojson
nlm_path <- "data/nitrogen/peconic_nlm_subwatersheds.gpkg"
nlm_geojson_path <- "data/nitrogen/peconic_nlm_subwatersheds.geojson"
nlm_url <- paste0(
"https://services.arcgis.com/JsDD4qdG5r2a7hR5/arcgis/rest/services/",
"NLM/FeatureServer/0/query?",
"where=1%3D1&outFields=*&returnGeometry=true&f=geojson"
)
if (file.exists(nlm_path)) {
nlm <- st_read(nlm_path, quiet = TRUE)
} else if (file.exists(nlm_geojson_path)) {
nlm <- st_read(nlm_geojson_path, quiet = TRUE)
} else {
nlm <- st_read(nlm_url, quiet = TRUE)
}
nlm <- nlm |>
st_make_valid() |>
st_transform(st_crs(pep_6347))
# Save a stable local copy for later runs.
st_write(nlm, "data/nitrogen/peconic_nlm_subwatersheds.gpkg", delete_dsn = TRUE, quiet = TRUE)
# Build a visual-only nearest-subwatershed influence surface across the Peconic boundary.
# This does not model transport; it only shows which terrestrial NLM polygon is nearest
# to each part of the estuary frame.
nlm_pts <- st_point_on_surface(nlm)
nlm_voronoi_geom <- st_voronoi(st_union(st_geometry(nlm_pts)), envelope = st_as_sfc(st_bbox(pep_6347))) |>
st_collection_extract("POLYGON")
nlm_voronoi <- st_as_sf(nlm_voronoi_geom) |>
st_set_crs(st_crs(pep_6347)) |>
st_make_valid() |>
st_intersection(pep_6347)
nlm_voronoi_idx <- st_nearest_feature(st_centroid(nlm_voronoi), nlm_pts)
nlm_voronoi <- bind_cols(
nlm_voronoi,
nlm |>
st_drop_geometry() |>
select(subwatershed_code, kg_N_per_year_per_Ha) |>
slice(nlm_voronoi_idx)
)
nlm_summary <- nlm |>
st_drop_geometry() |>
select(
subwatershed_code,
atmospheric,
wwtf,
septic,
agriculture,
lawns,
parks_golf,
Total,
kg_N_per_year_per_Ha,
PercentofTotal
) |>
arrange(desc(kg_N_per_year_per_Ha))
write_csv(nlm_summary, "outputs/tables/nitrogen_loading_subwatershed_summary.csv")
knitr::kable(head(nlm_summary, 12), digits = 2)
| subwatershed_code | atmospheric | wwtf | septic | agriculture | lawns | parks_golf | Total | kg_N_per_year_per_Ha | PercentofTotal |
|---|---|---|---|---|---|---|---|---|---|
| NF0 | 2191.16 | 0.00 | 5778.99 | 10199.53 | 620.17 | 841.71 | 19631.57 | 13.47 | 7.35 |
| SF8 | 186.41 | 0.00 | 1138.94 | 0.00 | 131.21 | 0.00 | 1456.56 | 12.08 | 0.55 |
| NF15 | 1669.06 | 0.00 | 4409.92 | 4911.54 | 470.05 | 736.34 | 12196.92 | 10.83 | 4.57 |
| NF14 | 1523.93 | 0.00 | 4345.45 | 4566.14 | 460.60 | 2.87 | 10898.98 | 10.80 | 4.08 |
| NF6 | 278.92 | 0.00 | 957.58 | 623.64 | 117.41 | 0.00 | 1977.56 | 10.51 | 0.74 |
| NF12 | 96.46 | 0.00 | 526.22 | 0.00 | 51.62 | 0.00 | 674.29 | 10.49 | 0.25 |
| NF8 | 1703.26 | 0.00 | 6009.13 | 2292.92 | 652.48 | 0.00 | 10657.79 | 9.75 | 3.99 |
| NF13 | 212.35 | 0.00 | 1060.87 | 0.00 | 109.53 | 0.00 | 1382.75 | 9.42 | 0.52 |
| NF11 | 1075.23 | 0.00 | 1981.35 | 3443.52 | 210.01 | 0.00 | 6710.10 | 9.29 | 2.51 |
| FB1 | 9396.97 | 15118.66 | 11746.76 | 12617.40 | 1670.21 | 2362.59 | 52912.59 | 8.75 | 19.81 |
| NF9 | 402.09 | 0.00 | 1807.75 | 0.00 | 199.37 | 0.00 | 2409.22 | 8.72 | 0.90 |
| NF5 | 843.54 | 0.00 | 336.89 | 4056.35 | 49.65 | 0.00 | 5286.43 | 8.09 | 1.98 |
p_nlm <- ggplot() +
geom_sf(data = nlm_voronoi, aes(fill = kg_N_per_year_per_Ha), color = NA, alpha = 0.22) +
geom_sf(data = nlm, aes(fill = kg_N_per_year_per_Ha), color = "white", linewidth = 0.2) +
{if (!is.null(shoreline_ctx)) geom_sf(data = shoreline_ctx, fill = NA, color = "grey35", linewidth = 0.15, inherit.aes = FALSE)} +
geom_sf(data = pep_6347, fill = NA, color = "black", linewidth = 0.35) +
coord_sf(crs = st_crs(pep_6347), expand = FALSE) +
scale_fill_gradient(low = "#e5f5e0", high = "#006d2c", name = "kg N / yr / ha") +
labs(
title = "Figure 5. Peconic nitrogen yield by subwatershed",
subtitle = "Solid polygons are NLM subwatersheds; transparent fill extends nearest-subwatershed influence across the estuary frame",
x = NULL,
y = NULL,
caption = "The transparent bay-area partition is a nearest-polygon visual proxy only; it is not hydrodynamic transport."
) +
theme_minimal() +
theme(legend.position = "bottom")
print(p_nlm)
ggsave("figures/figure_05_nitrogen_yield_subwatersheds.png", p_nlm, width = 9, height = 5.5, dpi = 300)
stations_path <- "data/water_quality/scdhs_peconic/Station locations - updated 102824.csv"
stations_raw <- read_csv(
stations_path,
show_col_types = FALSE,
locale = locale(encoding = "Windows-1252")
) |>
clean_names()
stations_sf <- stations_raw |>
filter(!is.na(latitude), !is.na(longitude)) |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, remove = FALSE) |>
st_transform(st_crs(pep_6347))
# Keep stations inside the Peconic boundary. This avoids carrying unrelated county stations forward.
peconic_stations <- stations_sf[st_intersects(stations_sf, pep_6347, sparse = FALSE)[, 1], ]
nrow(peconic_stations)
## [1] 334
# Bathymetric covariates come from the fixed raster products created above.
station_bathy <- terra::extract(
c(depth_final, slope, depth_class, slope_class, bathy_regime),
vect(peconic_stations)
) |>
as_tibble() |>
select(-ID)
peconic_station_covariates <- bind_cols(
peconic_stations,
station_bathy
) |>
left_join(depth_key, by = "depth_class") |>
left_join(slope_key |> select(slope_class, slope_label), by = "slope_class") |>
left_join(bathy_key |> select(bathy_regime, bathy_label), by = "bathy_regime")
# Nitrogen-loading polygons are terrestrial/subwatershed features.
# Marine station points are therefore linked to the nearest NLM polygon, not by containment.
nlm_for_join <- nlm |>
select(
subwatershed_code,
subwatershed,
atmospheric,
wwtf,
septic,
agriculture,
lawns,
parks_golf,
Total,
kg_N_per_year_per_Ha,
PercentofTotal
)
nearest_nlm_idx <- st_nearest_feature(peconic_station_covariates, nlm_for_join)
nearest_nlm_attrs <- nlm_for_join[nearest_nlm_idx, ] |> st_drop_geometry()
nearest_nlm_distance_m <- as.numeric(
st_distance(
peconic_station_covariates,
nlm_for_join[nearest_nlm_idx, ],
by_element = TRUE
)
)
station_nlm <- bind_cols(
peconic_station_covariates,
nearest_nlm_attrs,
nearest_nlm_distance_m = nearest_nlm_distance_m
)
station_nlm_table <- station_nlm |> st_drop_geometry()
write_csv(station_nlm_table, "outputs/tables/peconic_station_bathymetry_nearest_nitrogen_covariates.csv")
st_write(station_nlm, "outputs/peconic_station_bathymetry_nearest_nitrogen_covariates.gpkg", delete_dsn = TRUE, quiet = TRUE)
knitr::kable(
station_nlm_table |>
select(any_of(c(
"record_id", "record_name", "water_body_area", "water_body",
"depth_m", "slope_deg", "depth_label", "slope_label", "bathy_label",
"subwatershed_code", "kg_N_per_year_per_Ha", "nearest_nlm_distance_m",
"Total", "septic", "agriculture", "wwtf"
))) |>
head(12),
digits = 2
)
| record_id | record_name | water_body_area | water_body | depth_m | slope_deg | depth_label | slope_label | bathy_label | subwatershed_code | kg_N_per_year_per_Ha | nearest_nlm_distance_m | Total | septic | agriculture | wwtf |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SITE-060069 | Peconic Estuary | Peconic Estuary | Great Peconic Bay | 2.48 | 0.97 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | RI | 1.49 | 143.67 | 276.46 | 19.70 | 0.00 | 0.00 |
| SITE-060081 | Peconic Estuary | Peconic Estuary | Hog Creek | 1.78 | 0.92 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | NF12 | 10.49 | 131.26 | 674.29 | 526.22 | 0.00 | 0.00 |
| SITE-060100 | Peconic Estuary | Peconic Estuary | Great Peconic Bay | 1.01 | 5.75 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | NF14 | 10.80 | 531.41 | 10898.98 | 4345.45 | 4566.14 | 0.00 |
| SITE-060101 | Peconic Estuary | Peconic Estuary | East Creek | 1.97 | 1.62 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | NF0 | 13.47 | 15.81 | 19631.57 | 5778.99 | 10199.53 | 0.00 |
| SITE-060102 | Peconic Estuary | Peconic Estuary | Cutchogue Harbor | 4.09 | 0.04 | mid_3_8m | low_slope | mid_3_8m / low_slope | NF13 | 9.42 | 332.81 | 1382.75 | 1060.87 | 0.00 | 0.00 |
| SITE-060103 | Peconic Estuary | Peconic Estuary | East Creek | 3.01 | 2.24 | mid_3_8m | high_slope | mid_3_8m / high_slope | NF14 | 10.80 | 27.23 | 10898.98 | 4345.45 | 4566.14 | 0.00 |
| SITE-060104 | Peconic Estuary | Peconic Estuary | North Sea Harbor | 3.74 | 4.20 | mid_3_8m | high_slope | mid_3_8m / high_slope | SF2 | 5.40 | 14.53 | 15510.50 | 9004.15 | 796.36 | 32.21 |
| SITE-060105 | Peconic Estuary | Peconic Estuary | NA | 4.40 | 0.79 | mid_3_8m | high_slope | mid_3_8m / high_slope | NF11 | 9.29 | 563.67 | 6710.10 | 1981.35 | 3443.52 | 0.00 |
| SITE-060106 | Peconic Estuary | Peconic Estuary | Goose Creek | 2.66 | 5.30 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | NF9 | 8.72 | 19.51 | 2409.22 | 1807.75 | 0.00 | 0.00 |
| SITE-060107 | Peconic Estuary | Peconic Estuary | Town Creek | 3.14 | 1.94 | mid_3_8m | high_slope | mid_3_8m / high_slope | NF8 | 9.75 | 70.66 | 10657.79 | 6009.13 | 2292.92 | 0.00 |
| SITE-060108 | Peconic Estuary | Peconic Estuary | Southold Bay | 5.70 | 0.46 | mid_3_8m | moderate_slope | mid_3_8m / moderate_slope | NF9 | 8.72 | 645.00 | 2409.22 | 1807.75 | 0.00 | 0.00 |
| SITE-060109 | Peconic Estuary | Peconic Estuary | Hashamomuck Pond | 1.24 | 4.90 | shallow_0_3m | high_slope | shallow_0_3m / high_slope | NF8 | 9.75 | 0.00 | 10657.79 | 6009.13 | 2292.92 | 0.00 |
This map checks the station assignment step. The polygons show modeled nitrogen yield, while station colors show the local bathymetric depth class extracted from the DEM. The table written above preserves the nearest-subwatershed distance so this proxy can be filtered later if a station is too far from a modeled drainage polygon.
p_station_cov <- ggplot() +
geom_sf(data = nlm_voronoi, aes(fill = kg_N_per_year_per_Ha), color = NA, alpha = 0.18) +
geom_sf(data = nlm, aes(fill = kg_N_per_year_per_Ha), color = "white", linewidth = 0.15, alpha = 0.85) +
{if (!is.null(shoreline_ctx)) geom_sf(data = shoreline_ctx, fill = NA, color = "grey35", linewidth = 0.15, inherit.aes = FALSE)} +
geom_sf(data = pep_6347, fill = NA, color = "black", linewidth = 0.35) +
geom_sf(data = station_nlm, aes(color = depth_label), size = 1.6, alpha = 0.85) +
coord_sf(crs = st_crs(pep_6347), expand = FALSE) +
scale_fill_gradient(low = "#e5f5e0", high = "#006d2c", name = "kg N / yr / ha") +
scale_color_manual(
values = c(
"shallow_0_3m" = "#d9f0ff",
"mid_3_8m" = "#3182bd",
"deep_gt_8m" = "#08306b"
),
name = "Station depth class",
na.value = "grey30"
) +
labs(
title = "Figure 6. Station covariates: nitrogen yield and bathymetric class",
subtitle = "SCDHS stations are assigned DEM depth/slope classes and the nearest NLM subwatershed yield",
x = NULL,
y = NULL
) +
theme_minimal() +
theme(legend.position = "bottom")
print(p_station_cov)
ggsave("figures/figure_06_station_covariates_nitrogen_depth.png", p_station_cov, width = 9, height = 5.5, dpi = 300)
The station covariate table preserves the spatial assumptions used here: DEM-derived bathymetric class, nearest NLM subwatershed yield, and the distance from each station to that assigned subwatershed. Water-quality observations can then be summarized by station-year and joined to these fixed covariates for the 2014–2024 comparison window.
MUR GHRSST Level 4 SST is added as a coarse thermal-variability layer. The data were downloaded from the NOAA CoastWatch ERDDAP mirror as small Peconic-area NetCDF subsets, using every second day from May through August for each year in the working 2014–2025 window. I use the seasonal mean as a simple summer thermal-exposure variable and the May–August change in SST as a surface-temperature variability proxy. Large positive or negative changes should not be read as direct proof of upwelling or overturn, but they give a compact way to compare places where surface water warmed steadily against places where surface temperature changed more sharply across the same seasonal interval.
# NOAA CoastWatch ERDDAP MUR daily endpoint used for the local Peconic subset.
# The final workflow uses every second day from May through August.
BASE="https://coastwatch.noaa.gov/erddap/griddap/noaacwecnMURdaily.nc"
mkdir -p data/sst/mur_daily
for yr in {2014..2025}; do
start="${yr}-05-01"
end="${yr}-08-31"
d="$start"
while [ "$(date -d "$d" +%Y%m%d)" -le "$(date -d "$end" +%Y%m%d)" ]; do
ymd=$(date -d "$d" +%Y%m%d)
iso=$(date -d "$d" +%Y-%m-%d)
out="data/sst/mur_daily/mur_sst_peconic_${ymd}.nc"
curl -g -L --fail \
--connect-timeout 20 \
--max-time 120 \
--retry 3 \
--retry-delay 5 \
"${BASE}?analysed_sst[(${iso}T09:00:00Z):1:(${iso}T09:00:00Z)][(40.94759):1:(41.04557)][(-72.51269):1:(-72.24473)]" \
-o "$out"
d=$(date -I -d "$d + 2 days")
sleep 0.25
done
done
sst_files <- list.files(
"data/sst/mur_daily",
pattern = "mur_sst_peconic_[0-9]{8}\\.nc$",
full.names = TRUE
)
sst_index <- tibble(
file = sst_files,
date = ymd(str_extract(basename(sst_files), "[0-9]{8}")),
year = year(date),
month = month(date)
) |>
filter(year >= 2014, year <= 2025, month >= 5, month <= 8) |>
arrange(date)
read_sst_c <- function(file) {
r <- rast(file)
idx <- grep("analysed_sst|sst|Band", names(r), ignore.case = TRUE)[1]
if (is.na(idx)) idx <- 1
sst <- r[[idx]]
# MUR analysed_sst is usually stored in Kelvin. Convert only if needed.
sst_range <- global(sst, range, na.rm = TRUE)
if (as.numeric(sst_range[1, 2]) > 100) sst <- sst - 273.15
names(sst) <- "sst_c"
sst
}
make_sst_year <- function(year_value) {
files <- sst_index |>
filter(year == year_value) |>
pull(file)
if (length(files) < 2) return(NULL)
s <- rast(lapply(files, read_sst_c))
names(s) <- paste0("sst_", seq_len(nlyr(s)))
sst_mean <- app(s, mean, na.rm = TRUE)
sst_sd <- app(s, sd, na.rm = TRUE)
sst_delta <- s[[nlyr(s)]] - s[[1]]
names(sst_mean) <- paste0("sst_mean_may_aug_", year_value)
names(sst_sd) <- paste0("sst_sd_may_aug_", year_value)
names(sst_delta) <- paste0("sst_delta_may_aug_", year_value)
out <- c(sst_mean, sst_sd, sst_delta)
writeRaster(
out,
paste0("outputs/rasters/mur_sst_may_aug_metrics_", year_value, ".tif"),
overwrite = TRUE
)
out
}
sst_years <- sort(unique(sst_index$year))
sst_metrics_list <- lapply(sst_years, make_sst_year)
names(sst_metrics_list) <- sst_years
sst_metrics_list <- sst_metrics_list[!vapply(sst_metrics_list, is.null, logical(1))]
sst_stack <- if (length(sst_metrics_list) > 0) rast(sst_metrics_list) else NULL
if (!is.null(sst_stack)) {
writeRaster(
sst_stack,
"outputs/rasters/mur_sst_may_aug_metrics_2014_2025.tif",
overwrite = TRUE
)
}
sst_download_summary <- sst_index |>
count(year, name = "n_files")
knitr::kable(sst_download_summary)
| year | n_files |
|---|---|
| 2014 | 18 |
may_avg_sst <- tibble()
if (nrow(sst_index) > 0) {
may_avg_sst <- lapply(sort(unique(sst_index$year)), function(yr) {
may_files <- sst_index |>
filter(year == yr, month == 5) |>
pull(file)
if (length(may_files) == 0) return(NULL)
may_stack <- rast(lapply(may_files, read_sst_c))
tibble(
year = yr,
n_may_files = length(may_files),
mean_may_sst_c = as.numeric(global(app(may_stack, mean, na.rm = TRUE), mean, na.rm = TRUE)[1, 1])
)
}) |>
bind_rows()
write_csv(may_avg_sst, "outputs/tables/mur_may_average_sst.csv")
}
knitr::kable(may_avg_sst, digits = 2)
| year | n_may_files | mean_may_sst_c |
|---|---|---|
| 2014 | 18 | 9.63 |
raster_df <- function(r, value_name, size = 60000) {
df <- spatSample(r, size = size, method = "regular", as.df = TRUE, xy = TRUE, na.rm = TRUE) |>
as_tibble(.name_repair = "minimal")
df <- df[, 1:3]
names(df) <- c("x", "y", value_name)
df
}
if (!is.null(sst_stack)) {
display_year <- max(as.integer(names(sst_metrics_list)))
delta_name <- paste0("sst_delta_may_aug_", display_year)
delta_r <- sst_stack[[delta_name]]
# Bring the bathymetric slope grid into the SST grid so contours can be drawn on the same axes.
slope_sst_grid <- project(slope, delta_r, method = "bilinear")
delta_df <- raster_df(delta_r, "delta_sst_c", size = 60000)
slope_df_sst <- raster_df(slope_sst_grid, "slope_deg", size = 60000)
p_delta_slope <- ggplot() +
geom_raster(data = delta_df, aes(x = x, y = y, fill = delta_sst_c)) +
geom_contour(
data = slope_df_sst,
aes(x = x, y = y, z = slope_deg),
color = "grey20",
linewidth = 0.25,
bins = 5,
alpha = 0.65
) +
coord_equal() +
scale_fill_gradient2(
low = "#2166ac",
mid = "white",
high = "#b2182b",
midpoint = 0,
name = "May-Aug ΔSST (°C)"
) +
labs(
title = paste0("Figure 7. May--August SST change with bathymetric slope contours, ", display_year),
subtitle = "MUR SST change is shown as a surface-temperature variability proxy; grey contours show DEM-derived bathymetric slope resampled to the SST grid",
x = NULL,
y = NULL
) +
theme_minimal()
print(p_delta_slope)
ggsave(
"figures/figure_07_delta_sst_slope_overlay.png",
p_delta_slope,
width = 9,
height = 5.5,
dpi = 300
)
}
if (!is.null(sst_stack)) {
delta_layers <- grep("sst_delta_may_aug_", names(sst_stack), value = TRUE)
delta_mean <- app(sst_stack[[delta_layers]], mean, na.rm = TRUE)
names(delta_mean) <- "mean_delta_sst_c"
writeRaster(delta_mean, "outputs/rasters/mur_mean_delta_sst_may_aug_2014_2025.tif", overwrite = TRUE)
delta_mean_df <- raster_df(delta_mean, "mean_delta_sst_c", size = 60000)
p_delta_mean <- ggplot(delta_mean_df, aes(x = x, y = y, fill = mean_delta_sst_c)) +
geom_raster() +
coord_equal() +
scale_fill_gradient2(
low = "#2166ac",
mid = "white",
high = "#b2182b",
midpoint = 0,
name = "Mean ΔSST (°C)"
) +
labs(
title = "Figure 8. Mean May--August SST change, 2014--2025",
subtitle = "Daily MUR SST subsets summarized from the NOAA CoastWatch ERDDAP download stack",
x = NULL,
y = NULL
) +
theme_minimal()
print(p_delta_mean)
ggsave(
"figures/figure_08_mean_delta_sst_may_aug.png",
p_delta_mean,
width = 9,
height = 5.5,
dpi = 300
)
}
noaacwecnMURdaily). Used for May–August SST mean and
seasonal SST-change metrics downloaded as local Peconic-area NetCDF
subsets.