This R Markdown notebook supports the NEON EMERGE project by exploring relationships between:
This report is scoped to CUPE (Puerto Rico)
Workflow summary
Notes and limitations
site_id <- "CUPE"
start_ym <- "2020-01"
end_ym <- "2023-12"
tz_use <- "GMT"
Download NEON data
Nitrate (DP1.20033.001)
no3_product <- loadByProduct(
dpID = "DP1.20033.001",
site = site_id,
startdate = start_ym,
enddate = end_ym,
check.size = FALSE
)
# Use the 15-minute table when available
# (Table names can change between releases; adjust if needed.)
NO3_raw <- no3_product$NSW_15_minute
NO3 <- NO3_raw %>%
clean_names() %>%
mutate(
start_date_time = as.POSIXct(start_date_time, tz = tz_use),
site_id = as.character(site_id)
)
glimpse(NO3)
## Rows: 140,256
## Columns: 16
## $ domain_id <chr> "D04", "D04", "D04", "D04", "D04", "D04…
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE",…
## $ horizontal_position <chr> "102", "102", "102", "102", "102", "102…
## $ vertical_position <chr> "100", "100", "100", "100", "100", "100…
## $ start_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:15:…
## $ end_date_time <dttm> 2020-01-01 00:15:00, 2020-01-01 00:30:…
## $ surf_water_nitrate_mean <dbl> 41.0, 40.6, 40.3, 39.9, 39.5, 39.1, 38.…
## $ surf_water_nitrate_minimum <dbl> 40.7, 40.5, 39.9, 39.6, 39.2, 38.8, 38.…
## $ surf_water_nitrate_maximum <dbl> 41.3, 41.0, 40.7, 40.4, 39.9, 39.5, 39.…
## $ surf_water_nitrate_variance <dbl> 0.04, 0.03, 0.05, 0.04, 0.03, 0.03, 0.0…
## $ surf_water_nitrate_num_pts <dbl> 10, 11, 11, 11, 11, 11, 11, 10, 11, 11,…
## $ surf_water_nitrate_exp_uncert <dbl> 2.73, 2.71, 2.68, 2.66, 2.63, 2.60, 2.5…
## $ surf_water_nitrate_std_er_mean <dbl> 0.06, 0.05, 0.07, 0.06, 0.05, 0.05, 0.0…
## $ final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ publication_date <chr> "20251209T012620Z", "20251209T012620Z",…
## $ release <chr> "RELEASE-2026", "RELEASE-2026", "RELEAS…
Water quality / turbidity (DP1.20288.001)
wq_product <- loadByProduct(
dpID = "DP1.20288.001",
site = site_id,
startdate = start_ym,
enddate = end_ym,
check.size = FALSE
)
wq_raw <- wq_product$waq_instantaneous
wq <- wq_raw %>%
clean_names() %>%
mutate(
start_date_time = as.POSIXct(start_date_time, tz = tz_use),
site_id = as.character(site_id)
)
glimpse(wq)
## Rows: 4,207,680
## Columns: 40
## $ domain_id <chr> "D04", "D04", "D04", "D04", "D04", "D0…
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE"…
## $ horizontal_position <chr> "101", "101", "101", "101", "101", "10…
## $ vertical_position <chr> "100", "100", "100", "100", "100", "10…
## $ start_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ end_date_time <dttm> 2020-01-01 00:00:00, 2020-01-01 00:01…
## $ sensor_depth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sensor_depth_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ specific_conductance <dbl> 226.40, 226.54, 226.80, 227.14, 227.12…
## $ specific_conductance_exp_uncert <dbl> 2.65, 2.65, 2.65, 2.66, 2.66, 2.66, 2.…
## $ specific_cond_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ dissolved_oxygen <dbl> 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6…
## $ dissolved_oxygen_exp_uncert <dbl> 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.…
## $ dissolved_oxygen_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sea_level_dissolved_oxygen_sat <dbl> 100.45, 100.46, 100.45, 100.45, 100.46…
## $ sea_level_do_sat_exp_uncert <dbl> 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.…
## $ sea_level_do_sat_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ local_dissolved_oxygen_sat <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ local_do_sat_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ p_h <dbl> 8.13, 8.13, 8.13, 8.13, 8.12, 8.12, 8.…
## $ p_h_exp_uncert <dbl> 0.09, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ p_h_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chlorophyll <dbl> 5.97, 5.81, 5.81, 5.88, 5.82, 5.91, 5.…
## $ chlorophyll_exp_uncert <dbl> 0.25, 0.24, 0.24, 0.24, 0.24, 0.25, 0.…
## $ chlorophyll_final_qf <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ chla_relative_fluorescence <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ chla_rel_fluoro_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ turbidity <dbl> 6.15, 6.05, 5.98, 5.94, 5.94, 5.73, 5.…
## $ turbidity_exp_uncert <dbl> 0.10, 0.09, 0.09, 0.09, 0.09, 0.09, 0.…
## $ turbidity_final_qf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ f_dom <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ raw_calibratedf_dom <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_exp_uncert <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ f_dom_final_qf <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ buoy_na_flag <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ publication_date <chr> "20251209T031451Z", "20251209T031451Z"…
## $ release <chr> "RELEASE-2026", "RELEASE-2026", "RELEA…
NEON provides quality flags (QF). Here we retain records where: - NO3: final_qf == 0 - Turbidity: turbidity_final_qf == 0
NO3_clean <- NO3 %>%
filter(final_qf == 0)
wq_clean <- wq %>%
filter(turbidity_final_qf == 0)
# Quick check
summary(NO3_clean$surf_water_nitrate_mean)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.00 23.20 24.70 25.16 26.60 79.40
summary(wq_clean$turbidity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.330 0.550 4.959 0.850 3841.090
NO3 (15-min) time series
ggplot(NO3_clean, aes(x = start_date_time, y = surf_water_nitrate_mean)) +
geom_point(alpha = 0.4, size = 0.6) +
theme_minimal() +
labs(
title = paste0(site_id, ": Surface water NO3 (15-min)"),
x = "Date",
y = "NO3 (µmol/L)"
)
Turbidity (instantaneous) time series
ggplot(wq_clean, aes(x = start_date_time, y = turbidity)) +
geom_point(alpha = 0.4, size = 0.6) +
theme_minimal() +
labs(
title = paste0(site_id, ": Turbidity (instantaneous)"),
x = "Date",
y = "Turbidity (NTU)"
)
To compare NO3 and turbidity at consistent time steps, we compute: - Daily means: by date - Monthly means: by year-month
daily_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
df %>%
mutate(date = as.Date({{ datetime_col }})) %>%
group_by(across(all_of(group_cols)), date) %>%
summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}
monthly_mean <- function(df, datetime_col, value_col, group_cols = c("site_id")) {
df %>%
mutate(month = floor_date({{ datetime_col }}, unit = "month")) %>%
group_by(across(all_of(group_cols)), month) %>%
summarize(value = mean({{ value_col }}, na.rm = TRUE), .groups = "drop")
}
Compute daily/monthly series
# NO3
no3_daily <- daily_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
rename(no3 = value)
no3_monthly <- monthly_mean(NO3_clean, start_date_time, surf_water_nitrate_mean) %>%
rename(no3 = value)
# Turbidity
turb_daily <- daily_mean(wq_clean, start_date_time, turbidity) %>%
rename(turbidity = value)
turb_monthly <- monthly_mean(wq_clean, start_date_time, turbidity) %>%
rename(turbidity = value)
Merge NO3 + turbidity
merged_daily <- no3_daily %>%
inner_join(turb_daily, by = c("site_id", "date"))
merged_monthly <- no3_monthly %>%
inner_join(turb_monthly, by = c("site_id", "month"))
glimpse(merged_daily)
## Rows: 1,288
## Columns: 4
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ date <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020-01-05,…
## $ no3 <dbl> 31.87917, 27.87750, 27.85000, 27.04688, 27.14062, 26.91579, …
## $ turbidity <dbl> 2.1259103, 0.8213821, 0.2536417, 0.2758317, 0.2691556, 0.251…
glimpse(merged_monthly)
## Rows: 48
## Columns: 4
## $ site_id <chr> "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUPE", "CUP…
## $ month <dttm> 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01, 2020-05-01,…
## $ no3 <dbl> 25.98415, 24.22642, 25.86657, 24.90481, 23.70575, 25.75164, …
## $ turbidity <dbl> 5.0209368, 2.3902376, 4.0363770, 2.2843599, 0.4685160, 1.266…
Correlation matrices
# Daily correlation
corr_daily <- cor(merged_daily %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_daily, method = "circle", title = "Daily correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))
# Monthly correlation
corr_monthly <- cor(merged_monthly %>% select(no3, turbidity), use = "complete.obs")
corrplot(corr_monthly, method = "circle", title = "Monthly correlation (NO3 vs Turbidity)", mar = c(0,0,2,0))
Scatter plots (with trend)
ggplot(merged_daily, aes(x = no3, y = turbidity)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(
title = paste0(site_id, ": Daily NO3 vs Turbidity"),
x = "Daily mean NO3 (µmol/L)",
y = "Daily mean Turbidity (NTU)"
)
ggplot(merged_monthly, aes(x = no3, y = turbidity)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(
title = paste0(site_id, ": Monthly NO3 vs Turbidity"),
x = "Monthly mean NO3 (µmol/L)",
y = "Monthly mean Turbidity (NTU)"
)
Daily
daily_long <- merged_daily %>%
pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")
ggplot(daily_long, aes(x = date, y = value, linetype = parameter)) +
geom_line(alpha = 0.9) +
theme_minimal() +
labs(
title = paste0(site_id, ": Daily means"),
x = "Date",
y = "Value",
linetype = "Parameter"
)
Monthly
monthly_long <- merged_monthly %>%
pivot_longer(cols = c(no3, turbidity), names_to = "parameter", values_to = "value")
ggplot(monthly_long, aes(x = month, y = value, linetype = parameter)) +
geom_line(alpha = 0.9) +
theme_minimal() +
labs(
title = paste0(site_id, ": Monthly means"),
x = "Month",
y = "Value",
linetype = "Parameter"
)