1 Set up

1.1 Settings

project_dir  <- "/Users/heidi.k.hirsh/Documents/GitHub/Habitat_Sensitivity_FLK_v2"
slopes_dir   <- "Tables/Slopes"
slopes_file  <- NULL   # set like "slopes_per_visit_all_ndays_ALLHABS_20250916_0925.csv" or leave NULL for latest
cc_path_rel  <- "InputData/CCc_plusArea.csv"
# ndays_focus  <- 5
# zone_focus   <- "Inshore"
ndays_focus <- params$ndays_focus
zone_focus  <- params$zone_focus
zone_safe   <- gsub("[^A-Za-z0-9]+", "", zone_focus)
loess_span   <- 0.3
window_days <- 1
save_figs    <- TRUE

set.seed(42)
TS    <- function() format(Sys.time(), "%Y%m%d_%H%M")
stamp <- TS()

out_dir <- "Tables/AnnualRates"
fig_dir <- "Figures"
dir.create(file.path(fig_dir, "JdayPlots"), recursive = TRUE, showWarnings = FALSE)
dir.create(file.path(fig_dir, "AnnualRates"), recursive = TRUE, showWarnings = FALSE)
dir.create(file.path(fig_dir, "SeasonalRates"), recursive = TRUE, showWarnings = FALSE)
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)

1.2 Load libraries

suppressPackageStartupMessages({
  library(tidyverse); library(readr); library(lubridate); library(scales); library(ggplot2); 
  library(ggbreak); library(purrr); library(tidyr); library(forcats); library(patchwork); library(dplyr)})

1.3 Order factors

# ---- factor orders ----
site_levels   <- c("1","2","3","EK_IN","EK_MID","EK_OFF","4","5","5.5","6","6.5","MR","UK_IN","UK_MID","UK_OFF","7","8","9","9.5","10","11","12","13","14","15","15.5","16","17","18","19","20","21","LK","21.5","WS","24","23","22","22.5")
season_levels <- c("Winter","Spring","Summer","Fall")
subreg_levels <- c("BB","UK","MK","LK")
zone_levels   <- c("Inshore","Mid channel","Offshore","Oceanic")

1.4 Plotting settings

# Title suffix used across all plots/files
title_suffix <- paste0("(ndays = ", ndays_focus, ", zone = ", zone_focus, ")")

# Convenience: wrap a base title with the suffix
title_ndz <- function(txt) paste(txt, title_suffix)


season_pal <- c(
  "Winter" = "#2C7BB6",   # blue
  "Spring" = "#1FA187",   # green
  "Summer" = "#FDB863",   # warm yellow/orange
  "Fall"   = "#D7191C"    # red
)

sub_region_pal <- c(
  "Biscayne Bay" = "#1b9e77",
  "Upper Keys"   = "#d95f02",
  "Middle Keys"  = "#7570b3",
  "Lower Keys"   = "#e7298a"
)

# SiteID often has many levels → generate on demand, but stable for an explicit set
# make_site_pal <- function(levels) {grDevices::setNames(grDevices::hcl.colors(length(levels), palette = "Dark3"), levels)}
SITE_PAL_MASTER <- setNames(hcl.colors(length(site_levels), palette = "Dark3"), site_levels)

1.5 Helper functions

## ---- helpers ----
# ---- x-axis alignment helpers ----

# Build one canonical x order across multiple data frames
# x_col: name of the x column (string). For sites, use "SiteID".
# canonical: optional vector to enforce a preferred global order (e.g., site_levels)
build_common_x_levels <- function(dfs, x_col = "SiteID", canonical = NULL) {
  # all unique values present across data frames
  vals <- Reduce(union, lapply(dfs, function(d) as.character(d[[x_col]])))
  # keep canonical order if provided; otherwise sorted unique
  if (!is.null(canonical)) {
    vals <- intersect(canonical, vals)
  } else {
    vals <- sort(vals)
  }
  vals
}

# Apply the same factor levels to one data frame
apply_x_levels <- function(df, x_col = "SiteID", levels_vec) {
  df[[x_col]] <- factor(df[[x_col]], levels = levels_vec)
  df
}

# Use the same discrete x-scale everywhere (limits + expansion)
scale_x_locked <- function(levels_vec, expand_add = 0.6, drop = FALSE) {
  ggplot2::scale_x_discrete(limits = levels_vec, drop = drop,
                            expand = ggplot2::expansion(add = expand_add))
}

# ----- Reusable scale helpers (keep your plotting code clean) -----

## season
scale_color_season <- function(...) {
  ggplot2::scale_color_manual(values = season_pal, limits = names(season_pal), ...)
}
scale_fill_season <- function(...) {
  ggplot2::scale_fill_manual(values = season_pal, limits = names(season_pal), ...)
}

## Subregion
scale_color_subregion <- function(...) {
  ggplot2::scale_color_manual(values = sub_region_pal, limits = names(sub_region_pal), ...)
}
scale_fill_subregion <- function(...) {
  ggplot2::scale_fill_manual(values = sub_region_pal, limits = names(sub_region_pal), ...)
}

## Site
# Helper to grab just the sites present in a given data frame, but keeping the global order from `site_levels`
present_sites <- function(data) {
  x <- as.character(droplevels(data$SiteID))
  present <- unique(stats::na.omit(x))
  intersect(site_levels, present)  # keep global order, drop unused
}

# ----- Scales that auto-subset the master map to the data -----
scale_color_site <- function(data, ...) {
  lvls <- present_sites(data)
  vals <- SITE_PAL_MASTER[lvls]
  scale_color_manual(values = vals, limits = lvls, ...)
}

scale_fill_site <- function(data, ...) {
  lvls <- present_sites(data)
  vals <- SITE_PAL_MASTER[lvls]
  scale_fill_manual(values = vals, limits = lvls, ...)
}


# Non-leap base so 366 isn't induced by the calendar conversion
season_from_jday <- function(j) {
  d <- ymd("2023-01-01") + days(j - 1)
  m <- month(d)
  case_when(
    m %in% c(12, 1, 2) ~ "Winter",
    m %in% 3:5         ~ "Spring",
    m %in% 6:8         ~ "Summer",
    TRUE               ~ "Fall"
  )
}

fill_edges <- function(x) {
  if (length(x) == 0 || all(is.na(x))) return(x)
  i1 <- which(!is.na(x))[1]; i2 <- tail(which(!is.na(x)), 1)
  if (!is.na(i1) && i1 > 1) x[1:(i1 - 1)] <- x[i1]
  if (!is.na(i2) && i2 < length(x)) x[(i2 + 1):length(x)] <- x[i2]; x
}

# LOESS per site, predict daily on per-site grid (365 or 366)
fit_loess_and_predict_daily <- function(df_site, span = 0.3, window_days = 1) {
  d <- df_site %>%
    tidyr::drop_na(jday, slope) %>%                       # drop any rows missing jday or slope
    dplyr::mutate(
      jday = as.integer(jday) |> pmin(366L) |> pmax(1L),  # clamp to [1, 366] #jday should always be an integer value between 1 and 366 (really 365 since we used a non-leap year)
      slope_per_day = slope / window_days                 # convert slopes into per-day values (this is in case we have to account for ndays later)
    )
  if (nrow(d) < 5) return(tibble(jday = integer(), pred_per_day = numeric())) # require at least 5 points to make a fit
  grid_days <-  1:365 #force 365 days
    # grid_days <- if (any(d$jday == 366L, na.rm = TRUE)) 1:366 else 1:365
  newd      <- tibble(jday = grid_days)
  mod <- loess(slope_per_day ~ jday, data = d, span = span, degree = 1,control = loess.control(surface = "direct")) #fit smoothed trend (Loess, slope v. jday)
  preds <- as.numeric(predict(mod, newdata = newd)) %>% fill_edges()  #predict a slope value for every day of the year
  newd %>% mutate(pred_per_day = preds)
}

2 Load Data

2.1 Slopes:

slopes_file <- file.path("/Users/heidi.k.hirsh/Documents/GitHub/Habitat_Sensitivity_FLK_v2/Tables/Slopes","slopes_per_visit_all_ndays_20250903_1548.csv")
message("Reading slopes from: ", slopes_file)
slopes_all <- read_csv(slopes_file, show_col_types = FALSE)
# names(slopes_all)


slopes_all <- slopes_all %>%
  mutate(
    visitID     = trimws(as.character(visitID)),
    ndays       = as.integer(ndays),
    # habitat_col = trimws(as.character(habitat_col)),
    Zone        = factor(trimws(as.character(Zone)), levels = zone_levels),
    Season      = factor(Season, levels = season_levels),
    Sub_region  = factor(Sub_region, levels = subreg_levels),
    SiteID      = factor(as.character(SiteID), levels = site_levels)
  )

2.2 Carbonate chemistry dataframe (including areas)

# ---- 2) join CCc and make focus subset ----
CCc <- read_csv(file.path(project_dir, cc_path_rel), show_col_types = FALSE) %>%
  mutate(
    visitID    = as.character(visitID),
    ndays      = as.integer(ndays),
    Sub_region = factor(
      Sub_region,
      levels = subreg_levels,
      labels = c("Biscayne Bay", "Upper Keys", "Middle Keys", "Lower Keys")
    ),
    Season     = factor(Season, levels = season_levels),
    SiteID     = factor(as.character(SiteID), levels = site_levels),
    Zone       = factor(as.character(Zone), levels = zone_levels)
  )

#join with slopes:
CCc_with_slopes <- CCc %>%
  left_join(slopes_all, by = c("visitID","ndays"))

# Fix any duplicate columns
CCc_with_slopes <- CCc_with_slopes %>%
  select(-ends_with(".y")) %>%
  rename_with(~ sub("\\.x$", "", .x), ends_with(".x"))

# names(CCc_with_slopes)

2.3 Focus subset for exploring sensitivity (change ndays and zone in the settings)

IN <- CCc_with_slopes %>%
  filter(ndays == ndays_focus, Zone == zone_focus) %>% 
  mutate(jdate = ymd("2024-01-01") + days(jday - 1))
## first version labels x axis with jday number (not month)
# ggplot(IN, aes(x = jday, y = slope, color = Sub_region)) +
#   geom_point(alpha = 0.5) +
#   geom_smooth(method = "loess", se = TRUE, span = 0.3) +
#   scale_color_subregion() +                         # NEW
#   labs(
#     # title = "Slope of ΔDIC vs Julian day (ndays = 5, Inshore)",
#     title = title_ndz("Slope of ΔDIC vs Julian day"),
#     x = "Julian day",
#     y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
#   ) +
#   facet_wrap(~ SiteID, scales = "free_y") +
#   theme_bw()

B <- ggplot(IN, aes(x = jdate, y = slope, color = Sub_region, fill = Sub_region)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +  # alpha controls shading
  scale_color_subregion() +
  scale_fill_subregion() +  # match fills to line colors
  scale_x_date(
    date_breaks = "1 month",
    labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
  ) +
  labs(
    title = title_ndz("Slope of ΔDIC vs Julian day"),
    x = "Month",
    y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
  ) +
  facet_wrap(~ SiteID) +
  theme_bw()

B

C <- ggplot(IN, aes(x = jdate, y = slope, color = Sub_region, fill = Sub_region)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +  # alpha controls shading
  scale_color_subregion() +
  scale_fill_subregion() +  # match fills to line colors
  scale_x_date(
    date_breaks = "1 month",
    labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
  ) +
  labs(
    title = title_ndz("Slope of ΔDIC vs Julian day"),
    x = "Month",
    y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
  ) +
  # facet_wrap(~ SiteID) +
  facet_wrap(~ SiteID, scales = "free_y") +
  theme_bw()
C

## ultimately we should look at seasonality using jday level data (for now just do quarterly)


p_jday <- ggplot(IN, aes(x = jday, y = slope, color = SiteID, fill = SiteID)) +
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +
  scale_color_site(IN) +
  scale_fill_site(IN) +  # ensure ribbon fill matches line colors
  scale_x_continuous(
    breaks = c(15,46,75,105,135,166,196,227,258,288,319,349),
    labels = month.abb
  ) +
  labs(
    x = "Month",
    y = expression(paste("Slope (", Delta, "DIC per km"^2, ")")),
    title = title_ndz("Slope vs Julian day")
  ) +
  facet_wrap(~ Sub_region, ncol = 1) +
  # coord_cartesian(ylim = c(-2, 1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_jday

3 Seasonal and Annual Rates — Quarterly Approach

This section uses seasonal medians (Winter, Spring, Summer, Fall) based on Season in CCc.

#Seasonal median delta 
season_days <- 365.25/4 #91.3125
SSm <- IN %>%
  group_by(Sub_region, SiteID, Season) %>%
  summarise(mddDIC = median(slope, na.rm = TRUE), .groups = "drop") %>%
  mutate(mddDIC91 = mddDIC * season_days)

# SSm <- IN %>%
#   mutate(Season = factor(season_from_jday(as.integer(jday)),
#                          levels = c("Winter","Spring","Summer","Fall"))) %>%
#   group_by(Sub_region, SiteID, Season) %>%
#   summarise(mddDIC = median(slope, na.rm = TRUE), .groups = "drop") %>%
#   mutate(mddDIC91 = mddDIC * (365.25/4))


#Annual totals per site
#Also calculate how much seagrass is needed to offset OA signal (+5.35 DIC/year)
nAnn <- SSm %>%
  group_by(Sub_region, SiteID) %>%
  summarise(
    nAnndDIC_km2 = sum(mddDIC91, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    nAnndDIC_m2  = nAnndDIC_km2 / 1e6,      # convert from km² to m²
    SGm2_offOA   = -5.35 / nAnndDIC_m2,     # money calc (Ana/Ian 2023)
    SG_HA_offOA  = SGm2_offOA / 1e4,        # per hectare (1 ha = 10,000 m²)
    nAnndDIC_ha  = nAnndDIC_km2 / 100       # annual ΔDIC per hectare
  )
  
#caution for divide by 0 scenarios
nAnn <- nAnn %>%
  mutate(
    SGm2_offOA  = if_else(is.infinite(SGm2_offOA), NA_real_, SGm2_offOA),
    SG_HA_offOA = if_else(is.infinite(SG_HA_offOA), NA_real_, SG_HA_offOA)
  )

#for some reason per ha values are huge
summary(nAnn$nAnndDIC_ha)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.037618 -1.094904 -0.756063 -0.777381 -0.204771 -0.008653
# table(nAnn$nAnndDIC_m2,nAnn$SG_HA_offOA)

# Scatter plot: required hectares vs annual ΔDIC per m²

x= ggplot(nAnn, aes(x = nAnndDIC_m2, y = SG_HA_offOA, color = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.7) +
  scale_color_subregion() +                         # NEW
  scale_y_continuous(labels = scales::comma, limits = c(0, NA)) +
  scale_x_continuous(labels = scales::scientific) +
  labs(
    title = "Diagnostic: Required hectares vs. annual ΔDIC per m²",
    x = expression(paste("Annual ", Delta, "DIC per m"^2)),
    y = "Required seagrass area (ha)"
  ) +
  theme_bw()
x

# z= ggplot(nAnn, aes(x = nAnndDIC_ha, y = SG_HA_offOA, color = Sub_region)) +
#   geom_hline(yintercept = 0, linewidth = 0.3) +
#   geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
#   geom_point(size = 3, alpha = 0.7) +
#   scale_color_subregion() +                         # NEW
#   scale_y_continuous(labels = scales::comma, limits = c(0, NA)) +
#   scale_x_continuous(labels = scales::scientific) +
#   labs(
#     title = "Diagnostic: Required hectares vs. annual ΔDIC per ha",
#     x = expression(paste("Annual ", Delta, "DIC per ha")),
#      # x = expression(paste("Annual ", Delta, "DIC per m"^2)),
#     y = "Required seagrass area (ha)"
#   ) +
#   theme_bw()
# z
loess_fits <- IN %>%
  group_by(Sub_region, SiteID) %>%
  group_split() %>%
  map(~ loess(slope ~ jday, data = ., span = 0.3))

# Name the list by SiteID for clarity
names(loess_fits) <- IN %>% distinct(SiteID) %>% pull()



# Save the whole list of models
out_name <- paste0("loess_models_ndays", ndays_focus, "_", zone_safe, "_", stamp, ".rds")
out_path <- file.path("Tables", "AnnualRates", out_name)

saveRDS(loess_fits, out_path)
message("Saved loess models to: ", out_path)
# --- Save outputs ------------------------------------------------------
write_csv(SSm, file.path(out_dir, paste0("seasonal_median_slopes_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".csv")))
write_csv(nAnn,file.path(out_dir, paste0("annual_rates_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".csv")))
saveRDS(list(seasonal = SSm, annual = nAnn), file.path(out_dir, paste0("annual_rates_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".rds")))

message("Saved seasonal + annual tables for ndays = ", ndays_focus,", zone = ", zone_focus, " to: ", slopes_dir)

4 Plots for quarterly approach

# ======================= PLOT 1: Annual ΔDIC ============================
p1 <- ggplot(nAnn, aes(x = SiteID, y = nAnndDIC_ha, fill = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col() +
  scale_fill_subregion() +                          # NEW
  labs(
    # title = "Annual ΔDIC per hectare seagrass (ndays = 5, Inshore)",
    title = title_ndz("Annual ΔDIC per hectare seagrass"),
    # x = "SiteID",
    x=' ',
    y = expression(paste("Annual ", Delta,"DIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Sub-region"
  ) +
  scale_y_continuous(labels = scales::label_number(big.mark = ",")) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p1

p2 <- ggplot(nAnn, aes(x = SiteID, y = SG_HA_offOA, fill = Sub_region)) +
  geom_col() +
  scale_fill_subregion() +                      
  scale_y_break(c(40, 600)) +
  geom_text(
    aes(label = scales::comma(SG_HA_offOA, accuracy = 0.1)),
    vjust = 1.2, color = "white", size = 3
  ) +
    labs(
       title = title_ndz("Seagrass area (hectares) required to offset OA"),
       y="Seagrass Area (hectares)", x=' ') +
  theme_bw()
p2

# Convert seasonal totals to per-hectare
season_rates <- SSm %>%
  mutate(
    DIC_season_km2 = mddDIC91,       # seasonal ΔDIC per km²
    DIC_season_ha  = DIC_season_km2 / 100
  )

# keep a stable SiteID order if not already a factor
if (!is.factor(season_rates$SiteID)) {
  season_rates <- season_rates %>%
    mutate(
      SiteID = factor(SiteID, levels = sort(unique(SiteID))),
      Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall")),
      Season = fct_relevel(Season, "Fall", "Winter", "Spring", "Summer")
    )
}

# Net annual per site (for labels)
annual_rates <- season_rates %>%
  group_by(Sub_region, SiteID) %>%
  summarise(annual_DIC_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")

# Find a common label height (e.g., 5% above the tallest bar)
y_max <- max(annual_rates$annual_DIC_ha, na.rm = TRUE) * -20

p_stacked <- ggplot(season_rates,
                    aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(position = "stack") +
  scale_fill_season() +                             # NEW
  geom_text(data = annual_rates,
            aes(x = SiteID, label = round(annual_DIC_ha, 2)),
            y = y_max, vjust = 0, size = 3,
            inherit.aes = FALSE) +
  labs(
    # title = "Seasonal ΔDIC per hectare of seagrass (ndays = 5, Inshore)",
    title = title_ndz("Seasonal ΔDIC per hectare of seagrass"),
    x = "SiteID",
    y = expression(paste("ΔDIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Season"
  ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_stacked

# Grouped bars (side-by-side seasons) per site (per hectare)
p_grouped <- ggplot(season_rates,
                    aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6) +
  scale_fill_season() +                             # NEW
  labs(
    title =title_ndz("Seasonal ΔDIC per hectare by site"),
    x = "SiteID",
    y = expression(paste("Seasonal ", Delta,"DIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Season"
  ) +
  scale_y_continuous(labels = scales::label_number(big.mark = ",")) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_grouped

5 Seasonal and Annual Rates — Jday (Enhanced Seasonal Resolution)

This section uses LOESS fits by Julian day to generate daily (or weekly) resolution contributions, then sums to seasons or years.

season_calendar <- IN %>%
  transmute(jday = as.integer(jday),
            Season = factor(Season, levels = season_levels)) %>%
  count(jday, Season, sort = TRUE) %>%
  group_by(jday) %>%
  slice_max(n, n = 1, with_ties = FALSE) %>%
  ungroup() %>%
  # ensure all 1:365 exist; fill any missing with month→season mapping
  right_join(tibble(jday = 1:365), by = "jday") %>%
  mutate(
    Season = if_else(
      is.na(Season),
      factor(season_from_jday(jday), levels = season_levels),
      Season,
      missing = factor(NA, levels = season_levels)  # ensures type consistency
    )
  )

summary(season_calendar)
##       jday        Season         n        
##  Min.   :  1   Winter:90   Min.   :1.000  
##  1st Qu.: 92   Spring:91   1st Qu.:3.000  
##  Median :183   Summer:91   Median :5.000  
##  Mean   :183   Fall  :93   Mean   :4.468  
##  3rd Qu.:274               3rd Qu.:6.000  
##  Max.   :365               Max.   :9.000  
##                            NA's   :303
# --- fit one LOESS per site across the year; DO NOT carry Season here ---
daily_preds <- IN %>%
  transmute(Sub_region, SiteID,
            jday  = pmin(pmax(as.integer(jday), 1L), 365L),
            slope) %>%                                # <- no Season column
  group_by(Sub_region, SiteID) %>%
  group_modify(~ fit_loess_and_predict_daily(.x, span = loess_span,
                                             window_days = window_days)) %>%
  ungroup() %>%
  # attach the single Season column from the calendar (no duplicates)
  left_join(season_calendar, by = "jday") %>%
  mutate(
    contrib_ha = pred_per_day / 100,
    Season     = factor(Season, levels = season_levels)
  )

# --- seasonal totals using that single Season column ---
seasonal_jday <- daily_preds %>%
  group_by(Sub_region, SiteID, Season) %>%
  summarise(DIC_season_ha = sum(contrib_ha, na.rm = TRUE), .groups = "drop")

annual_jday <- seasonal_jday %>%
  group_by(Sub_region, SiteID) %>%
  summarise(annual_DIC_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")
# --- Hectares required to offset OA (Jday/LOESS version) ---


# Convert per ha → per m2 and compute area to offset +5.35 µmol/kg/yr
#    (Positive required area when annual_DIC_m2 is negative—i.e., drawdown.)
nAnn_jday <- annual_jday %>%
  mutate(
    annual_DIC_m2 = annual_DIC_ha / 1e4,
    SGm2_offOA    = -5.35 /  annual_DIC_m2,
    SG_HA_offOA   = SGm2_offOA / 1e4              # hectares required
  ) %>%
  mutate(                                       # clean divide-by-zero/infs
    SGm2_offOA  = if_else(is.finite(SGm2_offOA),  SGm2_offOA,  NA_real_),
    SG_HA_offOA = if_else(is.finite(SG_HA_offOA), SG_HA_offOA, NA_real_)
  ) %>%
  mutate(                                       # optional: stable x order
    SiteID = fct_inorder(as.factor(SiteID))
  )


# ---- Align x for j0 (annual_jday) and j1 (nAnn_jday) ----
COMMON_X_J <- build_common_x_levels(
  list(annual_jday, nAnn_jday),
  x_col = "SiteID",
  canonical = site_levels
)
annual_jday <- apply_x_levels(annual_jday, "SiteID", COMMON_X_J)
nAnn_jday   <- apply_x_levels(nAnn_jday,   "SiteID", COMMON_X_J)

# (Optional) Diagnostic scatter: required ha vs annual ΔDIC per m²
j2 <- ggplot(nAnn_jday, aes(x = annual_DIC_m2, y = SG_HA_offOA, color = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.8, na.rm = TRUE) +
  scale_color_subregion() +                          # NEW
  scale_x_continuous(labels = scales::scientific) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Diagnostic (Jday): Required hectares vs. annual ΔDIC per m²",
    x = expression(paste("Annual ", Delta, "DIC per m"^2)),
    y = "Required seagrass area (ha)"
  ) +
  theme_bw()
j2

j0 <- ggplot(annual_jday, aes(SiteID, annual_DIC_ha, fill = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(width = 0.7) +                            # SAME width in both
  scale_fill_subregion() +
  scale_x_locked(COMMON_X_J, expand_add = 0.6, drop = FALSE) +  # lock x
  labs(title = title_ndz("Annual (jday) ΔDIC per ha"),
       x = "SiteID",
       y = expression(paste(Delta, "DIC (", mu, "mol kg"^{-1}, ") per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
j0 

j1 <- ggplot(nAnn_jday, aes(SiteID, SG_HA_offOA, fill = Sub_region)) +
  geom_col(width = 0.7) +                            # SAME width in both
  scale_fill_subregion() +
  scale_x_locked(COMMON_X_J, expand_add = 0.6, drop = FALSE) +  # lock x
  scale_y_break(c(40, 190)) +
  geom_text(aes(label = scales::comma(SG_HA_offOA, accuracy = 0.1)),
            vjust = 1.2, color = "white", size = 3) +
  labs(title = title_ndz("Seagrass area (hectares) required to offset OA"),
       x = "SiteID", y = "Seagrass Area (hectares)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
j1

# --- Quarterly (from your SSm/nAnn workflow) ---
quarterly_season <- SSm %>%
  mutate(DIC_season_ha = mddDIC91 / 100)

annual_quarterly <- quarterly_season %>%
  group_by(SiteID) %>%
  summarise(annual_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")

# site_lvls_plot <- intersect(site_levels, unique(as.character(quarterly_season$SiteID)))
# quarterly_season <- quarterly_season %>% mutate(SiteID = factor(SiteID, levels = site_lvls_plot))
# seasonal_jday    <- seasonal_jday    %>% mutate(SiteID = factor(SiteID, levels = site_lvls_plot))

# ---- Align x for p_quarterly and p_jday ----
COMMON_X_QJ <- build_common_x_levels(
  list(quarterly_season, seasonal_jday),
  x_col = "SiteID",
  canonical = site_levels
)

quarterly_season <- apply_x_levels(quarterly_season, "SiteID", COMMON_X_QJ)
seasonal_jday    <- apply_x_levels(seasonal_jday,    "SiteID", COMMON_X_QJ)


p_quarterly <- ggplot(quarterly_season,
                      aes(SiteID, DIC_season_ha, fill = Season)) +
  geom_col(position = "stack") +
  scale_fill_season() +
  scale_x_locked(COMMON_X_QJ) + 
  labs(title = "Quarterly method (Season × 91.3 days)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
# theme(axis.text.x = element_blank(),
  #       axis.ticks.x = element_blank(),
  #       axis.title.x = element_blank())
# p_quarterly 

# --- Jday LOESS (from daily_preds) ---
p_jday <- ggplot(seasonal_jday,
                 aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "stack") +
  scale_fill_season() +                               # NEW
  labs(title = "Jday method (daily to seasonal sum)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
# p_jday

# --- Combine with patchwork ---
p_quarterly | p_jday

# Compute per-season per-ha for quarterly
quarterly_season <- SSm %>%
  mutate(DIC_season_ha = mddDIC91 / 100)

# Compute per-season per-ha for jday (you already have seasonal_jday)
# seasonal_jday: cols SiteID, Season, DIC_season_ha

# Make sure Season is a factor in the same order for both
quarterly_season <- quarterly_season %>%
  mutate(Season = factor(Season, levels = c("Winter","Spring","Summer","Fall")))
seasonal_jday <- seasonal_jday %>%
  mutate(Season = factor(Season, levels = c("Winter","Spring","Summer","Fall")))

# Common y-limit across both plots (optional but helpful for visual comparison)
y_max <- max(
  quarterly_season$DIC_season_ha,
  seasonal_jday$DIC_season_ha,
  na.rm = TRUE
) * 1.05
y_lim <- c(0, y_max)

# Grouped (side-by-side) bars 
# 
# p_quarterly <- ggplot(quarterly_season,
#                       aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
#   geom_col(position = "dodge") +
#   labs(title = "Quarterly method (Season × 91.3 days)",
#        y = expression(paste(Delta, "DIC per ha"))) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.05)))
# 
# p_jday <- ggplot(seasonal_jday,
#                       aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
#   geom_col(position = "dodge") +
#   labs(title = "Jday LOESS method (daily to season sum)",
#        y = expression(paste(Delta, "DIC per ha"))) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.05)))
# 
# 
# # Side-by-side comparison
# (p_quarterly | p_jday) + patchwork::plot_layout(guides = "collect") &
#   theme(legend.position = "top")


# Compute global y-limits across both methods
y_range <- range(
  c(quarterly_season$DIC_season_ha, seasonal_jday$DIC_season_ha),
  na.rm = TRUE
)

# Quarterly plot
p_quarterly <- ggplot(quarterly_season,
                      aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "dodge") +
  scale_fill_season()+
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  labs(title = "Quarterly method (Season × 91.3 days)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = y_range,
                     expand = expansion(mult = c(0.05, 0.05)))

# Jday plot
p_jday <- ggplot(seasonal_jday,
                 aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "dodge") +
  scale_fill_season()+
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  labs(title = "Jday method (daily to season sum)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = y_range,
                     expand = expansion(mult = c(0.05, 0.05)))

# Side-by-side comparison with shared legend
two =(p_quarterly | p_jday) + patchwork::plot_layout(guides = "collect") &
  theme(legend.position = "top")
two

## what about hectares needed comparison? 
# ---- session info ----
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0 ggbreak_0.1.6   scales_1.3.0    lubridate_1.9.3
##  [5] forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4     purrr_1.0.2    
##  [9] readr_2.1.5     tidyr_1.3.1     tibble_3.2.1    ggplot2_3.5.1  
## [13] tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1   xfun_0.52          bslib_0.8.0        lattice_0.22-6    
##  [5] splines_4.2.1      ggfun_0.2.0        colorspace_2.1-1   vctrs_0.6.5       
##  [9] generics_0.1.3     htmltools_0.5.8.1  mgcv_1.8-40        yaml_2.3.10       
## [13] utf8_1.2.4         gridGraphics_0.5-1 rlang_1.1.4        jquerylib_0.1.4   
## [17] pillar_1.9.0       glue_1.8.0         withr_3.0.1        rappdirs_0.3.3    
## [21] bit64_4.0.5        lifecycle_1.0.4    munsell_0.5.1      gtable_0.3.5      
## [25] evaluate_0.24.0    labeling_0.4.3     knitr_1.50         tzdb_0.4.0        
## [29] fastmap_1.2.0      parallel_4.2.1     fansi_1.0.6        cachem_1.1.0      
## [33] vroom_1.6.5        jsonlite_1.8.8     farver_2.1.2       bit_4.0.5         
## [37] fs_1.6.4           hms_1.1.3          digest_0.6.37      aplot_0.2.8       
## [41] stringi_1.8.4      grid_4.2.1         cli_3.6.3          tools_4.2.1       
## [45] yulab.utils_0.2.1  magrittr_2.0.3     sass_0.4.9         crayon_1.5.3      
## [49] pkgconfig_2.0.3    Matrix_1.6-4       ggplotify_0.1.2    timechange_0.3.0  
## [53] rmarkdown_2.29     rstudioapi_0.16.0  R6_2.5.1           nlme_3.1-157      
## [57] compiler_4.2.1