1 Project overview

This R Markdown notebook supports the NEON EMERGE project by exploring relationships between:

This report is scoped to CUPE (Puerto Rico)

Workflow summary

  1. Download NEON data products via loadByProduct()
  2. Filter to final quality flag values indicating usable data (finalQF == 0, turbidity QF == 0)
  3. Aggregate to daily and monthly means
  4. Explore relationships (correlation matrix + scatter plots)
  5. Visualize time series patterns

Notes and limitations


2 Parameters

site_id   <- "CUPE"
start_ym  <- "2020-01"
end_ym    <- "2023-12"
tz_use    <- "GMT"

Download NEON data

Nitrate (DP1.20033.001)

no3_product <- loadByProduct(
  dpID = "DP1.20033.001",
  site = site_id,
  startdate = start_ym,
  enddate = end_ym,
  check.size = FALSE
)
# Use the 15-minute table when available
# (Table names can change between releases; adjust if needed.)
NO3_raw <- no3_product$NSW_15_minute

NO3 <- NO3_raw %>%
  clean_names() %>%
  mutate(
    start_date_time = as.POSIXct(start_date_time, tz = tz_use),
    site_id = as.character(site_id)
  )

glimpse(NO3)
## Rows: 140,256
## Columns: 16
## $ domain_id                      <chr> "D04", "D04", "D04", "D04", "D04", "D04…
## $ site_id                        <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE",…
## $ horizontal_position            <chr> "102", "102", "102", "102", "102", "102…
## $ vertical_position              <chr> "100", "100", "100", "100", "100", "100…
## $ start_date_time                <dttm> 2020-01-01 00:00:00, 2020-01-01 00:15:…
## $ end_date_time                  <dttm> 2020-01-01 00:15:00, 2020-01-01 00:30:…
## $ surf_water_nitrate_mean        <dbl> 41.0, 40.6, 40.3, 39.9, 39.5, 39.1, 38.…
## $ surf_water_nitrate_minimum     <dbl> 40.7, 40.5, 39.9, 39.6, 39.2, 38.8, 38.…
## $ surf_water_nitrate_maximum     <dbl> 41.3, 41.0, 40.7, 40.4, 39.9, 39.5, 39.…
## $ surf_water_nitrate_variance    <dbl> 0.04, 0.03, 0.05, 0.04, 0.03, 0.03, 0.0…
## $ surf_water_nitrate_num_pts     <dbl> 10, 11, 11, 11, 11, 11, 11, 10, 11, 11,…
## $ surf_water_nitrate_exp_uncert  <dbl> 2.73, 2.71, 2.68, 2.66, 2.63, 2.60, 2.5…
## $ surf_water_nitrate_std_er_mean <dbl> 0.06, 0.05, 0.07, 0.06, 0.05, 0.05, 0.0…
## $ final_qf                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ publication_date               <chr> "20251209T012620Z", "20251209T012620Z",…
## $ release                        <chr> "RELEASE-2026", "RELEASE-2026", "RELEAS…

Water quality / turbidity (DP1.20288.001)

wq_product <- loadByProduct(
  dpID = "DP1.20288.001",
  site = site_id,
  startdate = start_ym,
  enddate = end_ym,
  check.size = FALSE
)
wq_raw <- wq_product$waq_instantaneous

wq <- wq_raw %>%
  clean_names() %>%
  mutate(
    start_date_time = as.POSIXct(start_date_time, tz = tz_use),
    site_id = as.character(site_id)
  )

glimpse(wq)
## Rows: 4,207,680
## Columns: 40
## $ domain_id                       <chr> "D04", "D04", "D04", "D04", "D04", "D0…
## $ site_id                         <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE"…
## $ horizontal_position             <chr> "101", "101", "101", "101", "101", "10…
## $ vertical_position               <chr> "100", "100", "100", "100", "100", "10…
## $ start_date_time                 <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ end_date_time                   <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ sensor_depth                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_exp_uncert         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_final_qf           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ specific_conductance            <dbl> 226.40, 226.54, 226.80, 227.14, 227.12…
## $ specific_conductance_exp_uncert <dbl> 2.65, 2.65, 2.65, 2.66, 2.66, 2.66, 2.…
## $ specific_cond_final_qf          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ dissolved_oxygen                <dbl> 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6…
## $ dissolved_oxygen_exp_uncert     <dbl> 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.…
## $ dissolved_oxygen_final_qf       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sea_level_dissolved_oxygen_sat  <dbl> 100.45, 100.46, 100.45, 100.45, 100.46…
## $ sea_level_do_sat_exp_uncert     <dbl> 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.…
## $ sea_level_do_sat_final_qf       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ local_dissolved_oxygen_sat      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_exp_uncert         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_final_qf           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ p_h                             <dbl> 8.13, 8.13, 8.13, 8.13, 8.12, 8.12, 8.…
## $ p_h_exp_uncert                  <dbl> 0.09, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ p_h_final_qf                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chlorophyll                     <dbl> 5.97, 5.81, 5.81, 5.88, 5.82, 5.91, 5.…
## $ chlorophyll_exp_uncert          <dbl> 0.25, 0.24, 0.24, 0.24, 0.24, 0.25, 0.…
## $ chlorophyll_final_qf            <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chla_relative_fluorescence      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_exp_uncert      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_final_qf        <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ turbidity                       <dbl> 6.15, 6.05, 5.98, 5.94, 5.94, 5.73, 5.…
## $ turbidity_exp_uncert            <dbl> 0.10, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ turbidity_final_qf              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ f_dom                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ raw_calibratedf_dom             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_exp_uncert                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_final_qf                  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ buoy_na_flag                    <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ publication_date                <chr> "20251209T031451Z", "20251209T031451Z"…
## $ release                         <chr> "RELEASE-2026", "RELEASE-2026", "RELEA…

3 Data cleaning and quality filtering

NEON provides quality flags (QF). Here we retain records where: - NO3: final_qf == 0 - Turbidity: turbidity_final_qf == 0

NO3_clean <- NO3 %>%
  filter(final_qf == 0)

wq_clean <- wq %>%
  filter(turbidity_final_qf == 0)

# Quick check
summary(NO3_clean$surf_water_nitrate_mean)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.00   23.20   24.70   25.16   26.60   79.40
summary(wq_clean$turbidity)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    0.000    0.330    0.550    4.959    0.850 3841.090

4 Quick exploratory plots

NO3 (15-min) time series

ggplot(NO3_clean, aes(x = start_date_time, y = surf_water_nitrate_mean)) +
  geom_point(alpha = 0.4, size = 0.6) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Surface water NO3 (15-min)"),
    x = "Date",
    y = "NO3 (µmol/L)"
  )

Turbidity (instantaneous) time series

ggplot(wq_clean, aes(x = start_date_time, y = turbidity)) +
  geom_point(alpha = 0.4, size = 0.6) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Turbidity (instantaneous)"),
    x = "Date",
    y = "Turbidity (NTU)"
  )


5 Daily and monthly aggregation

To compare NO3 and turbidity at consistent time steps, we compute: - Daily means: by date - Monthly means: by year-month

daily_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
  df %>%
    mutate(date = as.Date({{ datetime_col }})) %>%
    group_by(across(all_of(group_cols)), date) %>%
    summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}

monthly_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
  df %>%
    mutate(month = floor_date({{ datetime_col }}, unit = "month")) %>%
    group_by(across(all_of(group_cols)), month) %>%
    summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}

Compute daily/monthly series

# NO3
no3_daily <- daily_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
  rename(no3 = value)

no3_monthly <- monthly_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
  rename(no3 = value)

# Turbidity
turb_daily <- daily_mean(wq_clean, start_date_time, turbidity) %>%
  rename(turbidity = value)

turb_monthly <- monthly_mean(wq_clean, start_date_time, turbidity) %>%
  rename(turbidity = value)

Merge NO3 + turbidity

merged_daily <- no3_daily %>%
  inner_join(turb_daily, by = c("site_id", "date"))

merged_monthly <- no3_monthly %>%
  inner_join(turb_monthly, by = c("site_id", "month"))

glimpse(merged_daily)
## Rows: 1,288
## Columns: 4
## $ site_id   <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ date      <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020-01-05,…
## $ no3       <dbl> 31.87917, 27.87750, 27.85000, 27.04688, 27.14062, 26.91579, …
## $ turbidity <dbl> 2.1259103, 0.8213821, 0.2536417, 0.2758317, 0.2691556, 0.251…
glimpse(merged_monthly)
## Rows: 48
## Columns: 4
## $ site_id   <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ month     <dttm> 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01, 2020-05-01,…
## $ no3       <dbl> 25.98415, 24.22642, 25.86657, 24.90481, 23.70575, 25.75164, …
## $ turbidity <dbl> 5.0209368, 2.3902376, 4.0363770, 2.2843599, 0.4685160, 1.266…

6 Relationship assessment

Correlation matrices

# Daily correlation
corr_daily <- cor(merged_daily %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_daily, method = "circle", title = "Daily correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))

# Monthly correlation
corr_monthly <- cor(merged_monthly %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_monthly, method = "circle", title = "Monthly correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))

Scatter plots (with trend)

ggplot(merged_daily, aes(x = no3, y = turbidity)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Daily NO3 vs Turbidity"),
    x = "Daily mean NO3 (µmol/L)",
    y = "Daily mean Turbidity (NTU)"
  )

ggplot(merged_monthly, aes(x = no3, y = turbidity)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Monthly NO3 vs Turbidity"),
    x = "Monthly mean NO3 (µmol/L)",
    y = "Monthly mean Turbidity (NTU)"
  )

7 Time series comparison (daily and monthly)

Daily

daily_long <- merged_daily %>%
  pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")

ggplot(daily_long, aes(x = date, y = value, linetype = parameter)) +
  geom_line(alpha = 0.9) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Daily means"),
    x = "Date",
    y = "Value",
    linetype = "Parameter"
  )

Monthly

monthly_long <- merged_monthly %>%
  pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")

ggplot(monthly_long, aes(x = month, y = value, linetype = parameter)) +
  geom_line(alpha = 0.9) +
  theme_minimal() +
  labs(
    title = paste0(site_id, ": Monthly means"),
    x = "Month",
    y = "Value",
    linetype = "Parameter"
  )