1 Project overview

This R Markdown notebook supports the NEON EMERGE project by exploring relationships between:

Goal

This report is scoped to CUPE (Puerto Rico)

Workflow summary

  1. Download NEON data products via loadByProduct()
  2. Filter to final quality flag values indicating usable data (finalQF == 0, turbidity QF == 0)
  3. Aggregate to daily and monthly means
  4. Explore relationships (correlation matrix + scatter plots)
  5. Visualize time series patterns

Notes and limitations


2 Parameters

site_id   <- "CUPE"
start_ym  <- "2020-01"
end_ym    <- "2023-12"
tz_use    <- "GMT"

Download NEON data

Nitrate (DP1.20033.001)

no3_product <- loadByProduct(
  dpID = "DP1.20033.001",
  site = site_id,
  startdate = start_ym,
  enddate = end_ym,
  check.size = FALSE
)
# Use the 15-minute table when available
# (Table names can change between releases; adjust if needed.)
NO3_raw <- no3_product$NSW_15_minute

NO3 <- NO3_raw %>%
  clean_names() %>%
  mutate(
    start_date_time = as.POSIXct(start_date_time, tz = tz_use),
    site_id = as.character(site_id)
  )

glimpse(NO3)
## Rows: 140,256
## Columns: 16
## $ domain_id                      <chr> "D04", "D04", "D04", "D04", "D04", "D04…
## $ site_id                        <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE",…
## $ horizontal_position            <chr> "102", "102", "102", "102", "102", "102…
## $ vertical_position              <chr> "100", "100", "100", "100", "100", "100…
## $ start_date_time                <dttm> 2020-01-01 00:00:00, 2020-01-01 00:15:…
## $ end_date_time                  <dttm> 2020-01-01 00:15:00, 2020-01-01 00:30:…
## $ surf_water_nitrate_mean        <dbl> 41.0, 40.6, 40.3, 39.9, 39.5, 39.1, 38.…
## $ surf_water_nitrate_minimum     <dbl> 40.7, 40.5, 39.9, 39.6, 39.2, 38.8, 38.…
## $ surf_water_nitrate_maximum     <dbl> 41.3, 41.0, 40.7, 40.4, 39.9, 39.5, 39.…
## $ surf_water_nitrate_variance    <dbl> 0.04, 0.03, 0.05, 0.04, 0.03, 0.03, 0.0…
## $ surf_water_nitrate_num_pts     <dbl> 10, 11, 11, 11, 11, 11, 11, 10, 11, 11,…
## $ surf_water_nitrate_exp_uncert  <dbl> 2.73, 2.71, 2.68, 2.66, 2.63, 2.60, 2.5…
## $ surf_water_nitrate_std_er_mean <dbl> 0.06, 0.05, 0.07, 0.06, 0.05, 0.05, 0.0…
## $ final_qf                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ publication_date               <chr> "20251209T012620Z", "20251209T012620Z",…
## $ release                        <chr> "RELEASE-2026", "RELEASE-2026", "RELEAS…

Water quality / turbidity (DP1.20288.001)

wq_product <- loadByProduct(
  dpID = "DP1.20288.001",
  site = site_id,
  startdate = start_ym,
  enddate = end_ym,
  check.size = FALSE
)
wq_raw <- wq_product$waq_instantaneous

wq <- wq_raw %>%
  clean_names() %>%
  mutate(
    start_date_time = as.POSIXct(start_date_time, tz = tz_use),
    site_id = as.character(site_id)
  )

glimpse(wq)
## Rows: 4,207,680
## Columns: 40
## $ domain_id                       <chr> "D04", "D04", "D04", "D04", "D04", "D0…
## $ site_id                         <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE"…
## $ horizontal_position             <chr> "101", "101", "101", "101", "101", "10…
## $ vertical_position               <chr> "100", "100", "100", "100", "100", "10…
## $ start_date_time                 <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ end_date_time                   <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ sensor_depth                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_exp_uncert         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_final_qf           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ specific_conductance            <dbl> 226.40, 226.54, 226.80, 227.14, 227.12…
## $ specific_conductance_exp_uncert <dbl> 2.65, 2.65, 2.65, 2.66, 2.66, 2.66, 2.…
## $ specific_cond_final_qf          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ dissolved_oxygen                <dbl> 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6…
## $ dissolved_oxygen_exp_uncert     <dbl> 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.…
## $ dissolved_oxygen_final_qf       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sea_level_dissolved_oxygen_sat  <dbl> 100.45, 100.46, 100.45, 100.45, 100.46…
## $ sea_level_do_sat_exp_uncert     <dbl> 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.…
## $ sea_level_do_sat_final_qf       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ local_dissolved_oxygen_sat      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_exp_uncert         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_final_qf           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ p_h                             <dbl> 8.13, 8.13, 8.13, 8.13, 8.12, 8.12, 8.…
## $ p_h_exp_uncert                  <dbl> 0.09, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ p_h_final_qf                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chlorophyll                     <dbl> 5.97, 5.81, 5.81, 5.88, 5.82, 5.91, 5.…
## $ chlorophyll_exp_uncert          <dbl> 0.25, 0.24, 0.24, 0.24, 0.24, 0.25, 0.…
## $ chlorophyll_final_qf            <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chla_relative_fluorescence      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_exp_uncert      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_final_qf        <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ turbidity                       <dbl> 6.15, 6.05, 5.98, 5.94, 5.94, 5.73, 5.…
## $ turbidity_exp_uncert            <dbl> 0.10, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ turbidity_final_qf              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ f_dom                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ raw_calibratedf_dom             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_exp_uncert                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_final_qf                  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ buoy_na_flag                    <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ publication_date                <chr> "20251209T031451Z", "20251209T031451Z"…
## $ release                         <chr> "RELEASE-2026", "RELEASE-2026", "RELEA…

3 Data cleaning and quality filtering

NEON provides quality flags (QF). Here we retain records where: - NO₃: final_qf == 0 - Turbidity: turbidity_final_qf == 0

NO3_clean <- NO3 %>%
  filter(final_qf == 0)

wq_clean <- wq %>%
  filter(turbidity_final_qf == 0)

# Quick check
summary(NO3_clean$surf_water_nitrate_mean)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.00   23.20   24.70   25.16   26.60   79.40
summary(wq_clean$turbidity)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    0.000    0.330    0.550    4.959    0.850 3841.090

4 Quick exploratory plots

NO₃ (15-min) time series

ggplot(NO3_clean, aes(x = start_date_time, y = surf_water_nitrate_mean)) +
  geom_point(alpha = 0.4, size = 0.6) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Surface water NO3 (15-min)"),
    x = "Date",
    y = "NO3 (µmol/L)"
  )

Turbidity (instantaneous) time series

ggplot(wq_clean, aes(x = start_date_time, y = turbidity)) +
  geom_point(alpha = 0.4, size = 0.6) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Turbidity (instantaneous)"),
    x = "Date",
    y = "Turbidity (NTU)"
  )


5 Daily and monthly aggregation

To compare NO₃ and turbidity at consistent time steps, we compute: - Daily means: by date - Monthly means: by year-month

daily_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
  df %>%
    mutate(date = as.Date({{ datetime_col }})) %>%
    group_by(across(all_of(group_cols)), date) %>%
    summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}

monthly_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
  df %>%
    mutate(month = floor_date({{ datetime_col }}, unit = "month")) %>%
    group_by(across(all_of(group_cols)), month) %>%
    summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}

Compute daily/monthly series

# NO3
no3_daily <- daily_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
  rename(no3 = value)

no3_monthly <- monthly_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
  rename(no3 = value)

# Turbidity
turb_daily <- daily_mean(wq_clean, start_date_time, turbidity) %>%
  rename(turbidity = value)

turb_monthly <- monthly_mean(wq_clean, start_date_time, turbidity) %>%
  rename(turbidity = value)

Merge NO₃ + turbidity

merged_daily <- no3_daily %>%
  inner_join(turb_daily, by = c("site_id", "date"))

merged_monthly <- no3_monthly %>%
  inner_join(turb_monthly, by = c("site_id", "month"))

glimpse(merged_daily)
## Rows: 1,288
## Columns: 4
## $ site_id   <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ date      <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020-01-05,…
## $ no3       <dbl> 31.87917, 27.87750, 27.85000, 27.04688, 27.14062, 26.91579, …
## $ turbidity <dbl> 2.1259103, 0.8213821, 0.2536417, 0.2758317, 0.2691556, 0.251…
glimpse(merged_monthly)
## Rows: 48
## Columns: 4
## $ site_id   <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ month     <dttm> 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01, 2020-05-01,…
## $ no3       <dbl> 25.98415, 24.22642, 25.86657, 24.90481, 23.70575, 25.75164, …
## $ turbidity <dbl> 5.0209368, 2.3902376, 4.0363770, 2.2843599, 0.4685160, 1.266…

6 Relationship assessment

Correlation matrices

# Daily correlation
corr_daily <- cor(merged_daily %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_daily, method = "circle", title = "Daily correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))

# Monthly correlation
corr_monthly <- cor(merged_monthly %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_monthly, method = "circle", title = "Monthly correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))

Scatter plots (with trend)

ggplot(merged_daily, aes(x = no3, y = turbidity)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Daily NO3 vs Turbidity"),
    x = "Daily mean NO3 (µmol/L)",
    y = "Daily mean Turbidity (NTU)"
  )

ggplot(merged_monthly, aes(x = no3, y = turbidity)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Monthly NO3 vs Turbidity"),
    x = "Monthly mean NO3 (µmol/L)",
    y = "Monthly mean Turbidity (NTU)"
  )

7 Time series comparison (daily and monthly)

Daily

daily_long <- merged_daily %>%
  pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")

ggplot(daily_long, aes(x = date, y = value, linetype = parameter)) +
  geom_line(alpha = 0.9) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Daily means"),
    x = "Date",
    y = "Value",
    linetype = "Parameter"
  )

Monthly

monthly_long <- merged_monthly %>%
  pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")

ggplot(monthly_long, aes(x = month, y = value, linetype = parameter)) +
  geom_line(alpha = 0.9) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Monthly means"),
    x = "Month",
    y = "Value",
    linetype = "Parameter"
  )


8 Preliminary Results

The analysis revealed a weak relationship between turbidity and nitrate (NO₃) concentrations. Peaks in turbidity did not consistently correspond with increases in nitrate, suggesting that sediment resuspension or runoff events are not the primary drivers of nitrate variability in this dataset. Overall, nitrate dynamics in the system are likely influenced by additional processes such as groundwater inputs, biological uptake, or watershed nutrient sources. Future work should explore additional environmental variables such as discharge, precipitation, and seasonal patterns to better understand the drivers of nitrate variability.