This R Markdown notebook supports the NEON EMERGE project by exploring relationships between:
Goal
This report is scoped to CUPE (Puerto Rico)
Workflow summary
Notes and limitations
site_id <- "CUPE"
start_ym <- "2020-01"
end_ym <- "2023-12"
tz_use <- "GMT"
Download NEON data
Nitrate (DP1.20033.001)
no3_product <- loadByProduct(
dpID = "DP1.20033.001",
site = site_id,
startdate = start_ym,
enddate = end_ym,
check.size = FALSE
)
# Use the 15-minute table when available
# (Table names can change between releases; adjust if needed.)
NO3_raw <- no3_product$NSW_15_minute
NO3 <- NO3_raw %>%
clean_names() %>%
mutate(
start_date_time = as.POSIXct(start_date_time, tz = tz_use),
site_id = as.character(site_id)
)
glimpse(NO3)
## Rows: 140,256
## Columns: 16
## $ domain_id <chr> "D04", "D04", "D04", "D04", "D04", "D04…
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE",…
## $ horizontal_position <chr> "102", "102", "102", "102", "102", "102…
## $ vertical_position <chr> "100", "100", "100", "100", "100", "100…
## $ start_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:15:…
## $ end_date_time <dttm> 2020-01-01 00:15:00, 2020-01-01 00:30:…
## $ surf_water_nitrate_mean <dbl> 41.0, 40.6, 40.3, 39.9, 39.5, 39.1, 38.…
## $ surf_water_nitrate_minimum <dbl> 40.7, 40.5, 39.9, 39.6, 39.2, 38.8, 38.…
## $ surf_water_nitrate_maximum <dbl> 41.3, 41.0, 40.7, 40.4, 39.9, 39.5, 39.…
## $ surf_water_nitrate_variance <dbl> 0.04, 0.03, 0.05, 0.04, 0.03, 0.03, 0.0…
## $ surf_water_nitrate_num_pts <dbl> 10, 11, 11, 11, 11, 11, 11, 10, 11, 11,…
## $ surf_water_nitrate_exp_uncert <dbl> 2.73, 2.71, 2.68, 2.66, 2.63, 2.60, 2.5…
## $ surf_water_nitrate_std_er_mean <dbl> 0.06, 0.05, 0.07, 0.06, 0.05, 0.05, 0.0…
## $ final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ publication_date <chr> "20251209T012620Z", "20251209T012620Z",…
## $ release <chr> "RELEASE-2026", "RELEASE-2026", "RELEAS…
Water quality / turbidity (DP1.20288.001)
wq_product <- loadByProduct(
dpID = "DP1.20288.001",
site = site_id,
startdate = start_ym,
enddate = end_ym,
check.size = FALSE
)
wq_raw <- wq_product$waq_instantaneous
wq <- wq_raw %>%
clean_names() %>%
mutate(
start_date_time = as.POSIXct(start_date_time, tz = tz_use),
site_id = as.character(site_id)
)
glimpse(wq)
## Rows: 4,207,680
## Columns: 40
## $ domain_id <chr> "D04", "D04", "D04", "D04", "D04", "D0…
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE"…
## $ horizontal_position <chr> "101", "101", "101", "101", "101", "10…
## $ vertical_position <chr> "100", "100", "100", "100", "100", "10…
## $ start_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ end_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ sensor_depth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ specific_conductance <dbl> 226.40, 226.54, 226.80, 227.14, 227.12…
## $ specific_conductance_exp_uncert <dbl> 2.65, 2.65, 2.65, 2.66, 2.66, 2.66, 2.…
## $ specific_cond_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ dissolved_oxygen <dbl> 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6…
## $ dissolved_oxygen_exp_uncert <dbl> 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.…
## $ dissolved_oxygen_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sea_level_dissolved_oxygen_sat <dbl> 100.45, 100.46, 100.45, 100.45, 100.46…
## $ sea_level_do_sat_exp_uncert <dbl> 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.…
## $ sea_level_do_sat_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ local_dissolved_oxygen_sat <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ p_h <dbl> 8.13, 8.13, 8.13, 8.13, 8.12, 8.12, 8.…
## $ p_h_exp_uncert <dbl> 0.09, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ p_h_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chlorophyll <dbl> 5.97, 5.81, 5.81, 5.88, 5.82, 5.91, 5.…
## $ chlorophyll_exp_uncert <dbl> 0.25, 0.24, 0.24, 0.24, 0.24, 0.25, 0.…
## $ chlorophyll_final_qf <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chla_relative_fluorescence <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ turbidity <dbl> 6.15, 6.05, 5.98, 5.94, 5.94, 5.73, 5.…
## $ turbidity_exp_uncert <dbl> 0.10, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ turbidity_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ f_dom <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ raw_calibratedf_dom <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ buoy_na_flag <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ publication_date <chr> "20251209T031451Z", "20251209T031451Z"…
## $ release <chr> "RELEASE-2026", "RELEASE-2026", "RELEA…
NEON provides quality flags (QF). Here we retain records where: - NO₃: final_qf == 0 - Turbidity: turbidity_final_qf == 0
NO3_clean <- NO3 %>%
filter(final_qf == 0)
wq_clean <- wq %>%
filter(turbidity_final_qf == 0)
# Quick check
summary(NO3_clean$surf_water_nitrate_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.00 23.20 24.70 25.16 26.60 79.40
summary(wq_clean$turbidity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.330 0.550 4.959 0.850 3841.090
NO₃ (15-min) time series
ggplot(NO3_clean, aes(x = start_date_time, y = surf_water_nitrate_mean)) +
geom_point(alpha = 0.4, size = 0.6) +
theme_minimal() +
labs(
title = paste0(site_id, ": Surface water NO3 (15-min)"),
x = "Date",
y = "NO3 (µmol/L)"
)
Turbidity (instantaneous) time series
ggplot(wq_clean, aes(x = start_date_time, y = turbidity)) +
geom_point(alpha = 0.4, size = 0.6) +
theme_minimal() +
labs(
title = paste0(site_id, ": Turbidity (instantaneous)"),
x = "Date",
y = "Turbidity (NTU)"
)
To compare NO₃ and turbidity at consistent time steps, we compute: - Daily means: by date - Monthly means: by year-month
daily_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
df %>%
mutate(date = as.Date({{ datetime_col }})) %>%
group_by(across(all_of(group_cols)), date) %>%
summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}
monthly_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
df %>%
mutate(month = floor_date({{ datetime_col }}, unit = "month")) %>%
group_by(across(all_of(group_cols)), month) %>%
summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}
Compute daily/monthly series
# NO3
no3_daily <- daily_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
rename(no3 = value)
no3_monthly <- monthly_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
rename(no3 = value)
# Turbidity
turb_daily <- daily_mean(wq_clean, start_date_time, turbidity) %>%
rename(turbidity = value)
turb_monthly <- monthly_mean(wq_clean, start_date_time, turbidity) %>%
rename(turbidity = value)
Merge NO₃ + turbidity
merged_daily <- no3_daily %>%
inner_join(turb_daily, by = c("site_id", "date"))
merged_monthly <- no3_monthly %>%
inner_join(turb_monthly, by = c("site_id", "month"))
glimpse(merged_daily)
## Rows: 1,288
## Columns: 4
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ date <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020-01-05,…
## $ no3 <dbl> 31.87917, 27.87750, 27.85000, 27.04688, 27.14062, 26.91579, …
## $ turbidity <dbl> 2.1259103, 0.8213821, 0.2536417, 0.2758317, 0.2691556, 0.251…
glimpse(merged_monthly)
## Rows: 48
## Columns: 4
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ month <dttm> 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01, 2020-05-01,…
## $ no3 <dbl> 25.98415, 24.22642, 25.86657, 24.90481, 23.70575, 25.75164, …
## $ turbidity <dbl> 5.0209368, 2.3902376, 4.0363770, 2.2843599, 0.4685160, 1.266…
Correlation matrices
# Daily correlation
corr_daily <- cor(merged_daily %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_daily, method = "circle", title = "Daily correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))
# Monthly correlation
corr_monthly <- cor(merged_monthly %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_monthly, method = "circle", title = "Monthly correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))
Scatter plots (with trend)
ggplot(merged_daily, aes(x = no3, y = turbidity)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(
title = paste0(site_id, ": Daily NO3 vs Turbidity"),
x = "Daily mean NO3 (µmol/L)",
y = "Daily mean Turbidity (NTU)"
)
ggplot(merged_monthly, aes(x = no3, y = turbidity)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(
title = paste0(site_id, ": Monthly NO3 vs Turbidity"),
x = "Monthly mean NO3 (µmol/L)",
y = "Monthly mean Turbidity (NTU)"
)
Daily
daily_long <- merged_daily %>%
pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")
ggplot(daily_long, aes(x = date, y = value, linetype = parameter)) +
geom_line(alpha = 0.9) +
theme_minimal() +
labs(
title = paste0(site_id, ": Daily means"),
x = "Date",
y = "Value",
linetype = "Parameter"
)
Monthly
monthly_long <- merged_monthly %>%
pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")
ggplot(monthly_long, aes(x = month, y = value, linetype = parameter)) +
geom_line(alpha = 0.9) +
theme_minimal() +
labs(
title = paste0(site_id, ": Monthly means"),
x = "Month",
y = "Value",
linetype = "Parameter"
)
The analysis revealed a weak relationship between turbidity and nitrate (NO₃) concentrations. Peaks in turbidity did not consistently correspond with increases in nitrate, suggesting that sediment resuspension or runoff events are not the primary drivers of nitrate variability in this dataset. Overall, nitrate dynamics in the system are likely influenced by additional processes such as groundwater inputs, biological uptake, or watershed nutrient sources. Future work should explore additional environmental variables such as discharge, precipitation, and seasonal patterns to better understand the drivers of nitrate variability.