1 Set up

1.1 Settings

project_dir  <- "/Users/heidi.k.hirsh/Documents/GitHub/Habitat_Sensitivity_FLK_v2"
slopes_dir   <- "Tables/Slopes"
slopes_file  <- NULL   # set like "slopes_per_visit_all_ndays_ALLHABS_20250916_0925.csv" or leave NULL for latest
cc_path_rel  <- "InputData/CCc_plusArea.csv"
# ndays_focus  <- 5
# zone_focus   <- "Inshore"
ndays_focus <- params$ndays_focus
zone_focus  <- params$zone_focus
zone_safe   <- gsub("[^A-Za-z0-9]+", "", zone_focus)
loess_span   <- 0.3
window_days <- 1
save_figs    <- TRUE

set.seed(42)
TS    <- function() format(Sys.time(), "%Y%m%d_%H%M")
stamp <- TS()

out_dir <- "Tables/AnnualRates"
fig_dir <- "Figures"
dir.create(file.path(fig_dir, "JdayPlots"), recursive = TRUE, showWarnings = FALSE)
dir.create(file.path(fig_dir, "AnnualRates"), recursive = TRUE, showWarnings = FALSE)
dir.create(file.path(fig_dir, "SeasonalRates"), recursive = TRUE, showWarnings = FALSE)
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)

1.2 Load libraries

suppressPackageStartupMessages({
  library(tidyverse); library(readr); library(lubridate); library(scales); library(ggplot2); 
  library(ggbreak); library(purrr); library(tidyr); library(forcats); library(patchwork); library(dplyr)})

1.3 Order factors

# ---- factor orders ----
site_levels   <- c("1","2","3","EK_IN","EK_MID","EK_OFF","4","5","5.5","6","6.5","MR","UK_IN","UK_MID","UK_OFF","7","8","9","9.5","10","11","12","13","14","15","15.5","16","17","18","19","20","21","LK","21.5","WS","24","23","22","22.5")
season_levels <- c("Winter","Spring","Summer","Fall")
subreg_levels <- c("BB","UK","MK","LK")
zone_levels   <- c("Inshore","Mid channel","Offshore","Oceanic")

1.4 Plotting settings

# Title suffix used across all plots/files
title_suffix <- paste0("(ndays = ", ndays_focus, ", zone = ", zone_focus, ")")

# Convenience: wrap a base title with the suffix
title_ndz <- function(txt) paste(txt, title_suffix)


season_pal <- c(
  "Winter" = "#2C7BB6",   # blue
  "Spring" = "#1FA187",   # green
  "Summer" = "#FDB863",   # warm yellow/orange
  "Fall"   = "#D7191C"    # red
)

sub_region_pal <- c(
  "Biscayne Bay" = "#1b9e77",
  "Upper Keys"   = "#d95f02",
  "Middle Keys"  = "#7570b3",
  "Lower Keys"   = "#e7298a"
)

# SiteID often has many levels → generate on demand, but stable for an explicit set
# make_site_pal <- function(levels) {grDevices::setNames(grDevices::hcl.colors(length(levels), palette = "Dark3"), levels)}
SITE_PAL_MASTER <- setNames(hcl.colors(length(site_levels), palette = "Dark3"), site_levels)

1.5 Helper functions

## ---- helpers ----
# ---- x-axis alignment helpers ----

# Build one canonical x order across multiple data frames
# x_col: name of the x column (string). For sites, use "SiteID".
# canonical: optional vector to enforce a preferred global order (e.g., site_levels)
build_common_x_levels <- function(dfs, x_col = "SiteID", canonical = NULL) {
  # all unique values present across data frames
  vals <- Reduce(union, lapply(dfs, function(d) as.character(d[[x_col]])))
  # keep canonical order if provided; otherwise sorted unique
  if (!is.null(canonical)) {
    vals <- intersect(canonical, vals)
  } else {
    vals <- sort(vals)
  }
  vals
}

# Apply the same factor levels to one data frame
apply_x_levels <- function(df, x_col = "SiteID", levels_vec) {
  df[[x_col]] <- factor(df[[x_col]], levels = levels_vec)
  df
}

# Use the same discrete x-scale everywhere (limits + expansion)
scale_x_locked <- function(levels_vec, expand_add = 0.6, drop = FALSE) {
  ggplot2::scale_x_discrete(limits = levels_vec, drop = drop,
                            expand = ggplot2::expansion(add = expand_add))
}

# ----- Reusable scale helpers (keep your plotting code clean) -----

## season
scale_color_season <- function(...) {
  ggplot2::scale_color_manual(values = season_pal, limits = names(season_pal), ...)
}
scale_fill_season <- function(...) {
  ggplot2::scale_fill_manual(values = season_pal, limits = names(season_pal), ...)
}

## Subregion
scale_color_subregion <- function(...) {
  ggplot2::scale_color_manual(values = sub_region_pal, limits = names(sub_region_pal), ...)
}
scale_fill_subregion <- function(...) {
  ggplot2::scale_fill_manual(values = sub_region_pal, limits = names(sub_region_pal), ...)
}

## Site
# Helper to grab just the sites present in a given data frame, but keeping the global order from `site_levels`
present_sites <- function(data) {
  x <- as.character(droplevels(data$SiteID))
  present <- unique(stats::na.omit(x))
  intersect(site_levels, present)  # keep global order, drop unused
}

# ----- Scales that auto-subset the master map to the data -----
scale_color_site <- function(data, ...) {
  lvls <- present_sites(data)
  vals <- SITE_PAL_MASTER[lvls]
  scale_color_manual(values = vals, limits = lvls, ...)
}

scale_fill_site <- function(data, ...) {
  lvls <- present_sites(data)
  vals <- SITE_PAL_MASTER[lvls]
  scale_fill_manual(values = vals, limits = lvls, ...)
}


# Non-leap base so 366 isn't induced by the calendar conversion
season_from_jday <- function(j) {
  d <- ymd("2023-01-01") + days(j - 1)
  m <- month(d)
  case_when(
    m %in% c(12, 1, 2) ~ "Winter",
    m %in% 3:5         ~ "Spring",
    m %in% 6:8         ~ "Summer",
    TRUE               ~ "Fall"
  )
}

fill_edges <- function(x) {
  if (length(x) == 0 || all(is.na(x))) return(x)
  i1 <- which(!is.na(x))[1]; i2 <- tail(which(!is.na(x)), 1)
  if (!is.na(i1) && i1 > 1) x[1:(i1 - 1)] <- x[i1]
  if (!is.na(i2) && i2 < length(x)) x[(i2 + 1):length(x)] <- x[i2]; x
}

# LOESS per site, predict daily on per-site grid (365 or 366)
fit_loess_and_predict_daily <- function(df_site, span = 0.3, window_days = 1) {
  d <- df_site %>%
    tidyr::drop_na(jday, slope) %>%
    dplyr::mutate(
      jday = as.integer(jday) |> pmin(366L) |> pmax(1L)
    )

  if (nrow(d) < 5) return(tibble(jday = integer(), pred_per_day = numeric()))

  wd <- window_days                  # <— capture scalar outside mutate()

  grid_days <- 1:365
  newd <- tibble(jday = grid_days)
  mod <- loess(slope / wd ~ jday, data = d, span = span, degree = 1,
               control = loess.control(surface = "direct"))
  preds <- as.numeric(predict(mod, newdata = newd)) %>% fill_edges()
  newd %>% mutate(pred_per_day = preds)
}


# fit_loess_and_predict_daily <- function(df_site, span = 0.3, window_days = 1) {
#   d <- df_site %>%
#     tidyr::drop_na(jday, slope) %>%                       # drop any rows missing jday or slope
#     dplyr::mutate(
#       jday = as.integer(jday) |> pmin(366L) |> pmax(1L),  # clamp to [1, 366] #jday should always be an integer value between 1 and 366 (really 365 since we used a non-leap year)
#       slope_per_day = slope / window_days                 # convert slopes into per-day values (this is in case we have to account for ndays later)
#     )
#   if (nrow(d) < 5) return(tibble(jday = integer(), pred_per_day = numeric())) # require at least 5 points to make a fit
#   grid_days <-  1:365 #force 365 days
#     # grid_days <- if (any(d$jday == 366L, na.rm = TRUE)) 1:366 else 1:365
#   newd      <- tibble(jday = grid_days)
#   mod <- loess(slope_per_day ~ jday, data = d, span = span, degree = 1,control = loess.control(surface = "direct")) #fit smoothed trend (Loess, slope v. jday)
#   preds <- as.numeric(predict(mod, newdata = newd)) %>% fill_edges()  #predict a slope value for every day of the year
#   newd %>% mutate(pred_per_day = preds)
# }

# Optional: tiny helper to attach tags (Sub_region, SiteID) to a data frame
add_tags <- function(df, tags) {
  if (!nrow(tags)) return(df)
  dplyr::bind_cols(tags[rep(1, nrow(df)), , drop = FALSE], df)
}

# ---- Seasonal integration of annual LOESS with optional bootstrap ----
integrate_seasonal_loess <- function(df_site,
                                     season_calendar,
                                     span        = 0.3,
                                     window_days = 1,
                                     B           = 0,
                                     seed        = NULL,
                                     return_reps = FALSE) {

  stopifnot(all(c("jday","slope") %in% names(df_site)))

  # tags (explicit, no tidyselect)
  tags <- df_site %>%
    dplyr::select(Sub_region, SiteID) %>%
    dplyr::distinct()

  # ---- point estimate from full data ----
  preds <- fit_loess_and_predict_daily(df_site, span = span, window_days = window_days)

  if (nrow(preds) == 0) {
    point <- tibble::tibble(
      Season        = factor(character(), levels = season_levels),
      DIC_season_ha = numeric()
    )
  } else {
    point <- preds %>%
      dplyr::left_join(season_calendar, by = "jday") %>%
      dplyr::transmute(
        Season     = factor(Season_cal, levels = season_levels),
        contrib_ha = pred_per_day / 100
      ) %>%
      dplyr::group_by(Season) %>%
      dplyr::summarise(DIC_season_ha = sum(contrib_ha, na.rm = TRUE), .groups = "drop")
  }

  if (nrow(tags)) point <- add_tags(point, tags)

  # ---- optional bootstrap (case bootstrap of rows) ----
  if (B <= 0 || nrow(df_site) < 5) {
    return(list(point = point, stats = NULL, reps = NULL))
  }

  if (!is.null(seed)) set.seed(seed)
  n <- nrow(df_site)
  reps <- vector("list", B)

  for (b in seq_len(B)) {
    boot_df <- dplyr::slice_sample(df_site, n = n, replace = TRUE)
    p <- fit_loess_and_predict_daily(boot_df, span = span, window_days = window_days)

    if (nrow(p) == 0) {
      tmp <- tibble::tibble(
        Season         = factor(season_levels, levels = season_levels),
        DIC_season_ha  = NA_real_,
        rep            = b
      )
    } else {
      tmp <- p %>%
        dplyr::left_join(season_calendar, by = "jday") %>%
        dplyr::transmute(
          Season     = factor(Season_cal, levels = season_levels),
          contrib_ha = pred_per_day / 100
        ) %>%
        dplyr::group_by(Season) %>%
        dplyr::summarise(DIC_season_ha = sum(contrib_ha, na.rm = TRUE), .groups = "drop") %>%
        dplyr::mutate(rep = b)
    }

    if (nrow(tags)) tmp <- add_tags(tmp, tags)
    reps[[b]] <- tmp
  }

  reps <- dplyr::bind_rows(reps)

  stats <- reps %>%
    dplyr::group_by(Sub_region, SiteID, Season) %>%
    dplyr::summarise(
      mean_season_ha = mean(DIC_season_ha, na.rm = TRUE),
      sd_season_ha   = sd(DIC_season_ha,   na.rm = TRUE),
      lwr95          = stats::quantile(DIC_season_ha, 0.025, na.rm = TRUE),
      upr95          = stats::quantile(DIC_season_ha, 0.975, na.rm = TRUE),
      .groups = "drop"
    )

  list(point = point, stats = stats, reps = if (return_reps) reps else NULL)
}

# ---- Run for all sites in INx ----
integrate_all_sites <- function(INx,
                                season_calendar,
                                span        = loess_span,
                                window_days = 1,
                                B           = 0,
                                seed        = 123,
                                return_reps = FALSE) {

  site_list <- INx %>%
    dplyr::select(Sub_region, SiteID, jday, slope) %>%
    dplyr::group_by(Sub_region, SiteID) %>%
    dplyr::group_split()

  out <- lapply(site_list, function(df_site) {
    integrate_seasonal_loess(df_site,
      season_calendar = season_calendar,
      span = span,
      window_days = window_days,
      B = B,
      seed = seed,
      return_reps = return_reps
    )
  })

  point_all <- dplyr::bind_rows(lapply(out, `[[`, "point"))
  stats_all <- dplyr::bind_rows(lapply(out, `[[`, "stats"))
  reps_all  <- if (return_reps) dplyr::bind_rows(lapply(out, `[[`, "reps")) else NULL

  list(point = point_all, stats = stats_all, reps = reps_all)
}

2 Load Data

2.1 Slopes:

slopes_file <- file.path("/Users/heidi.k.hirsh/Documents/GitHub/Habitat_Sensitivity_FLK_v2/Tables/Slopes","slopes_per_visit_all_ndays_20250903_1548.csv")
message("Reading slopes from: ", slopes_file)
slopes_all <- read_csv(slopes_file, show_col_types = FALSE)
# names(slopes_all)


slopes_all <- slopes_all %>%
  mutate(
    visitID     = trimws(as.character(visitID)),
    ndays       = as.integer(ndays),
    # habitat_col = trimws(as.character(habitat_col)),
    Zone        = factor(trimws(as.character(Zone)), levels = zone_levels),
    Season      = factor(Season, levels = season_levels),
    Sub_region  = factor(Sub_region, levels = subreg_levels),
    SiteID      = factor(as.character(SiteID), levels = site_levels)
  )

2.2 Carbonate chemistry dataframe (including areas)

# ---- 2) join CCc and make focus subset ----
CCc <- read_csv(file.path(project_dir, cc_path_rel), show_col_types = FALSE) %>%
  mutate(
    visitID    = as.character(visitID),
    ndays      = as.integer(ndays),
    Sub_region = factor(
      Sub_region,
      levels = subreg_levels,
      labels = c("Biscayne Bay", "Upper Keys", "Middle Keys", "Lower Keys")
    ),
    Season     = factor(Season, levels = season_levels),
    SiteID     = factor(as.character(SiteID), levels = site_levels),
    Zone       = factor(as.character(Zone), levels = zone_levels)
  )

#join with slopes:
CCc_with_slopes <- CCc %>%
  left_join(slopes_all, by = c("visitID","ndays"))

# Fix any duplicate columns
CCc_with_slopes <- CCc_with_slopes %>%
  select(-ends_with(".y")) %>%
  rename_with(~ sub("\\.x$", "", .x), ends_with(".x"))

# names(CCc_with_slopes)
## --- Canonical, contiguous season calendar (independent of IN) ---
season_calendar <- tibble(jday = 1:365) %>%
  mutate(
    Season_cal = cut(
      jday,
      breaks = c(0, 90, 182, 273, 365),   # 1–90, 91–182, 183–273, 274–365
      labels = c("Winter","Spring","Summer","Fall"),
      include.lowest = TRUE, right = TRUE
    ) |> factor(levels = season_levels)
  )

# --- season midpoints for labels ---
season_bounds <- tibble(
  Season = factor(c("Winter","Spring","Summer","Fall"),
                  levels = c("Winter","Spring","Summer","Fall")),
  j_start = c(1, 91, 183, 274),
  j_end   = c(90, 182, 273, 365)
) %>%
  mutate(
    j_mid = floor((j_start + j_end)/2),
    x_mid = as.Date("2024-01-01") + (j_mid - 1)
)


## --- Attach both seasons + choose which to use + provenance flag ---
attach_seasons <- function(df, cal = season_calendar,
                           prefer = c("IN", "calendar")) {
  prefer <- match.arg(prefer)
  out <- df %>%
    mutate(jday = as.integer(jday)) %>%
    left_join(cal, by = "jday") %>%
    mutate(
      Season_IN  = if (is.null(.$Season)) factor(NA, levels = season_levels) else factor(Season, levels = season_levels),
      Season_cal = factor(Season_cal, levels = season_levels),
      Season_used = case_when(
        prefer == "IN"       & !is.na(Season_IN)  ~ Season_IN,
        prefer == "calendar" & !is.na(Season_cal) ~ Season_cal,
        # fallback if preferred missing:
        is.na(Season_IN)   & !is.na(Season_cal) ~ Season_cal,
        !is.na(Season_IN)  & is.na(Season_cal) ~ Season_IN,
        TRUE ~ NA_character_
      ) |> factor(levels = season_levels),
      season_source = case_when(
        is.na(Season_used) ~ "unknown",
        identical(Season_used, Season_IN)  & !is.na(Season_IN)  ~ "IN",
        identical(Season_used, Season_cal) & !is.na(Season_cal) ~ "calendar",
        TRUE ~ "mixed"
      )
    )
  out
}

2.3 Focus subset for exploring sensitivity (change ndays and zone in the settings)

IN <- CCc_with_slopes %>%
  filter(ndays == ndays_focus, Zone == zone_focus) %>% 
  mutate(jdate = ymd("2024-01-01") + days(jday - 1))

INx <- attach_seasons(IN, season_calendar, prefer = "IN") %>%
  mutate(jdate = as.Date("2024-01-01") + (jday - 1))
## first version labels x axis with jday number (not month)
# ggplot(IN, aes(x = jday, y = slope, color = Sub_region)) +
#   geom_point(alpha = 0.5) +
#   geom_smooth(method = "loess", se = TRUE, span = 0.3) +
#   scale_color_subregion() +                         # NEW
#   labs(
#     # title = "Slope of ΔDIC vs Julian day (ndays = 5, Inshore)",
#     title = title_ndz("Slope of ΔDIC vs Julian day"),
#     x = "Julian day",
#     y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
#   ) +
#   facet_wrap(~ SiteID, scales = "free_y") +
#   theme_bw()

B <- ggplot(IN, aes(x = jdate, y = slope, color = Sub_region, fill = Sub_region)) +
  geom_point(alpha = 0.5) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.3) +
  geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +  # alpha controls shading
  scale_color_subregion() +
  scale_fill_subregion() +  # match fills to line colors
  scale_x_date(
    date_breaks = "1 month",
    labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
  ) +
  labs(
    title = title_ndz("Slope of ΔDIC vs Julian day"),
    x = "Month",
    y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
  ) +
  facet_wrap(~ SiteID) +
  theme_bw()

B

C <- ggplot(IN, aes(x = jdate, y = slope)) +
  # points colored by season
  geom_point(aes(color = Season), alpha = 0.5) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.3) +
  
  # smooth lines by subregion (no ribbon)
  geom_smooth(aes(color = Sub_region),
              method = "loess", se = FALSE, span = 0.3) +
  
  scale_color_season() +
  scale_fill_subregion() +  # you could also drop this since fill isn’t used anymore
  scale_x_date(
    date_breaks = "1 month",
    labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
  ) +
  labs(
    title = title_ndz("Slope of ΔDIC vs Julian day"),
    x = "Month",
    y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
  ) +
  facet_wrap(~ SiteID,scales = "free_y") +
  theme_bw()
C

# D <- ggplot(IN, aes(x = jdate, y = slope, color = Season, fill = Sub_region)) +
#   geom_point(alpha = 0.5) +
#   geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +  # alpha controls shading
#   scale_color_season() +
#   scale_fill_subregion() +  # match fills to line colors
#   scale_x_date(
#     date_breaks = "1 month",
#     labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
#   ) +
#   labs(
#     title = title_ndz("Slope of ΔDIC vs Julian day"),
#     x = "Month",
#     y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, ")"))
#   ) +
#   # facet_wrap(~ SiteID) +
#   facet_wrap(~ SiteID, scales = "free_y") +
#   theme_bw()
# D

## ultimately we should look at seasonality using jday level data (for now just do quarterly)


p_jday <- ggplot(IN, aes(x = jday, y = slope, color = SiteID, fill = SiteID)) +
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "loess", se = TRUE, span = 0.3, alpha = 0.2) +
  scale_color_site(IN) +
  scale_fill_site(IN) +  # ensure ribbon fill matches line colors
  scale_x_continuous(
    breaks = c(15,46,75,105,135,166,196,227,258,288,319,349),
    labels = month.abb
  ) +
  labs(
    x = "Month",
    y = expression(paste("Slope (", Delta, "DIC per km"^2, ")")),
    title = title_ndz("Slope vs Julian day")
  ) +
  facet_wrap(~ Sub_region, ncol = 1) +
  # coord_cartesian(ylim = c(-2, 1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_jday

3 Seasonal and Annual Rates — Quarterly Approach

This section uses seasonal medians (Winter, Spring, Summer, Fall) based on Season in CCc.

#Seasonal median delta 
season_days <- 365.25/4 #91.3125
# SSm <- IN %>%
#   group_by(Sub_region, SiteID, Season) %>%
#   summarise(mddDIC = median(slope, na.rm = TRUE), .groups = "drop") %>%
#   mutate(mddDIC91 = mddDIC * season_days)

#now sesaonal summary carries how many obs came from each source
SSm <- INx %>%
  group_by(Sub_region, SiteID, Season_used) %>%
  summarise(
    mddDIC      = median(slope, na.rm = TRUE),
    n_points    = sum(!is.na(slope)),
    n_from_IN   = sum(season_source == "IN"       & !is.na(slope)),
    n_from_cal  = sum(season_source == "calendar" & !is.na(slope)),
    .groups = "drop"
  ) %>%
  rename(Season = Season_used) %>%
  mutate(mddDIC91 = mddDIC * season_days)

# --- labels for Quarterly seasonal totals (per ha) ---
lab_quarterly <- SSm %>%
  mutate(DIC_season_ha = mddDIC91/100) %>%
  left_join(season_bounds, by = "Season") %>%
  group_by(SiteID) %>%
  mutate(
    y_top  = max(INx$slope[INx$SiteID == first(SiteID)], na.rm = TRUE),
    y_bot  = min(INx$slope[INx$SiteID == first(SiteID)], na.rm = TRUE),
    y_lab_q = if_else(DIC_season_ha >= 0, 0.65*y_top, 0.65*y_bot), # a bit closer to 0 so labels don't overlap
    lab_q   = scales::number(DIC_season_ha, accuracy = 0.01)
  ) %>% ungroup()



# summary(SSm)#no season data from Season_cal here

#Annual totals per site
#Also calculate how much seagrass is needed to offset OA signal (+5.35 DIC/year)
nAnn <- SSm %>%
  group_by(Sub_region, SiteID) %>%
  summarise(
    nAnndDIC_km2 = sum(mddDIC91, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    nAnndDIC_m2  = nAnndDIC_km2 / 1e6,      # convert from km² to m²
    SGm2_offOA   = -5.35 / nAnndDIC_m2,     # money calc (Ana/Ian 2023)
    SG_HA_offOA  = SGm2_offOA / 1e4,        # per hectare (1 ha = 10,000 m²)
    nAnndDIC_ha  = nAnndDIC_km2 / 100       # annual ΔDIC per hectare
  )
  
#caution for divide by 0 scenarios
nAnn <- nAnn %>%
  mutate(
    SGm2_offOA  = if_else(is.infinite(SGm2_offOA), NA_real_, SGm2_offOA),
    SG_HA_offOA = if_else(is.infinite(SG_HA_offOA), NA_real_, SG_HA_offOA)
  )

#for some reason per ha values are huge
summary(nAnn$nAnndDIC_ha)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.037618 -1.094904 -0.756063 -0.777381 -0.204771 -0.008653
# table(nAnn$nAnndDIC_m2,nAnn$SG_HA_offOA)

# Scatter plot: required hectares vs annual ΔDIC per m²

x= ggplot(nAnn, aes(x = nAnndDIC_m2, y = SG_HA_offOA, color = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.7) +
  scale_color_subregion() +                         # NEW
  scale_y_continuous(labels = scales::comma, limits = c(0, NA)) +
  scale_x_continuous(labels = scales::scientific) +
  labs(
    title = "Diagnostic: Required hectares vs. annual ΔDIC per m²",
    x = expression(paste("Annual ", Delta, "DIC per m"^2)),
    y = "Required seagrass area (ha)"
  ) +
  theme_bw()
x

# z= ggplot(nAnn, aes(x = nAnndDIC_ha, y = SG_HA_offOA, color = Sub_region)) +
#   geom_hline(yintercept = 0, linewidth = 0.3) +
#   geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
#   geom_point(size = 3, alpha = 0.7) +
#   scale_color_subregion() +                         # NEW
#   scale_y_continuous(labels = scales::comma, limits = c(0, NA)) +
#   scale_x_continuous(labels = scales::scientific) +
#   labs(
#     title = "Diagnostic: Required hectares vs. annual ΔDIC per ha",
#     x = expression(paste("Annual ", Delta, "DIC per ha")),
#      # x = expression(paste("Annual ", Delta, "DIC per m"^2)),
#     y = "Required seagrass area (ha)"
#   ) +
#   theme_bw()
# z
loess_fits <- IN %>%
  group_by(Sub_region, SiteID) %>%
  group_split() %>%
  map(~ loess(slope ~ jday, data = ., span = 0.3))

# Name the list by SiteID for clarity
names(loess_fits) <- IN %>% distinct(SiteID) %>% pull()



# Save the whole list of models
out_name <- paste0("loess_models_ndays", ndays_focus, "_", zone_safe, "_", stamp, ".rds")
out_path <- file.path("Tables", "AnnualRates", out_name)

saveRDS(loess_fits, out_path)
message("Saved loess models to: ", out_path)
# --- Save outputs ------------------------------------------------------
write_csv(SSm, file.path(out_dir, paste0("seasonal_median_slopes_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".csv")))
write_csv(nAnn,file.path(out_dir, paste0("annual_rates_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".csv")))
saveRDS(list(seasonal = SSm, annual = nAnn), file.path(out_dir, paste0("annual_rates_ndays", ndays_focus,"_", zone_safe, "_", stamp, ".rds")))

message("Saved seasonal + annual tables for ndays = ", ndays_focus,", zone = ", zone_focus, " to: ", slopes_dir)

4 Plots for quarterly approach

# ======================= PLOT 1: Annual ΔDIC ============================
p1 <- ggplot(nAnn, aes(x = SiteID, y = nAnndDIC_ha, fill = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col() +
  scale_fill_subregion() +                          # NEW
  labs(
    # title = "Annual ΔDIC per hectare seagrass (ndays = 5, Inshore)",
    title = title_ndz("Annual ΔDIC per hectare seagrass"),
    # x = "SiteID",
    x=' ',
    y = expression(paste("Annual ", Delta,"DIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Sub-region"
  ) +
  scale_y_continuous(labels = scales::label_number(big.mark = ",")) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p1

p2 <- ggplot(nAnn, aes(x = SiteID, y = SG_HA_offOA, fill = Sub_region)) +
  geom_col() +
  scale_fill_subregion() +                      
  scale_y_break(c(40, 600)) +
  geom_text(
    aes(label = scales::comma(SG_HA_offOA, accuracy = 0.1)),
    vjust = 1.2, color = "white", size = 3
  ) +
    labs(
       title = title_ndz("Seagrass area (hectares) required to offset OA"),
       y="Seagrass Area (hectares)", x=' ') +
  theme_bw()
p2

# Convert seasonal totals to per-hectare
season_rates <- SSm %>%
  mutate(
    DIC_season_km2 = mddDIC91,       # seasonal ΔDIC per km²
    DIC_season_ha  = DIC_season_km2 / 100
  )

# keep a stable SiteID order if not already a factor
if (!is.factor(season_rates$SiteID)) {
  season_rates <- season_rates %>%
    mutate(
      SiteID = factor(SiteID, levels = sort(unique(SiteID))),
      Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall")),
      Season = fct_relevel(Season, "Fall", "Winter", "Spring", "Summer")
    )
}

# Net annual per site (for labels)
annual_rates <- season_rates %>%
  group_by(Sub_region, SiteID) %>%
  summarise(annual_DIC_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")

# Find a common label height (e.g., 5% above the tallest bar)
y_max <- max(annual_rates$annual_DIC_ha, na.rm = TRUE) * -20

p_stacked <- ggplot(season_rates,
                    aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(position = "stack") +
  scale_fill_season() +                             # NEW
  geom_text(data = annual_rates,
            aes(x = SiteID, label = round(annual_DIC_ha, 2)),
            y = y_max, vjust = 0, size = 3,
            inherit.aes = FALSE) +
  labs(
    # title = "Seasonal ΔDIC per hectare of seagrass (ndays = 5, Inshore)",
    title = title_ndz("Seasonal ΔDIC per hectare of seagrass"),
    x = "SiteID",
    y = expression(paste("ΔDIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Season"
  ) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_stacked

# Grouped bars (side-by-side seasons) per site (per hectare)
p_grouped <- ggplot(season_rates,
                    aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6) +
  scale_fill_season() +                             # NEW
  labs(
    title =title_ndz("Seasonal (quarterly) ΔDIC per hectare by site"),
    x = "SiteID",
    y = expression(paste("Seasonal ", Delta,"DIC (",mu,"mol kg"^{-1},") per ha")),
    fill = "Season"
  ) +
  scale_y_continuous(labels = scales::label_number(big.mark = ",")) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_grouped

5 Seasonal and Annual Rates — Jday (Enhanced Seasonal Resolution)

This section uses LOESS fits by Julian day to generate daily (or weekly) resolution contributions, then sums to seasons or years.

# season_calendar <- IN %>%
#   transmute(jday = as.integer(jday),
#             Season = factor(Season, levels = season_levels)) %>%
#   count(jday, Season, sort = TRUE) %>%
#   group_by(jday) %>%
#   slice_max(n, n = 1, with_ties = FALSE) %>%
#   ungroup() %>%
#   # ensure all 1:365 exist; fill any missing with month→season mapping
#   right_join(tibble(jday = 1:365), by = "jday") %>%
#   mutate(
#     Season = if_else(
#       is.na(Season),
#       factor(season_from_jday(jday), levels = season_levels),
#       Season,
#       missing = factor(NA, levels = season_levels)  # ensures type consistency
#     )
#   )
# 
# ##--- fit one LOESS per site across the year; DO NOT carry Season here ---
# daily_preds <- IN %>%
#   transmute(Sub_region, SiteID,
#             jday  = pmin(pmax(as.integer(jday), 1L), 365L),
#             slope) %>%
#   group_by(Sub_region, SiteID) %>%
#   group_modify(~ fit_loess_and_predict_daily(.x, span = loess_span,
#                                              window_days = window_days)) %>%
#   ungroup() %>%
#   # attach the single Season column from the calendar (no duplicates)
#   left_join(season_calendar, by = "jday") %>%
#   mutate(
#     contrib_ha = pred_per_day / 100,
#     Season     = factor(Season, levels = season_levels)
#   )


# # Deterministic season calendar: contiguous blocks, no dependence on IN
# season_calendar <- tibble(jday = 1:365) %>%
#   mutate(
#     Season = cut(
#       jday,
#       breaks = c(0, 90, 182, 273, 365),         # 1–90, 91–182, 183–273, 274–365
#       labels = c("Winter","Spring","Summer","Fall"),
#       include.lowest = TRUE, right = TRUE
#     ),
#     Season = factor(Season, levels = season_levels)
#   )
# summary(season_calendar)
# 
# 
# daily_preds <- IN %>%
#   transmute(Sub_region, SiteID,
#             jday  = pmin(pmax(as.integer(jday), 1L), 365L),
#             slope) %>%
#   group_by(Sub_region, SiteID) %>%
#   group_modify(~ fit_loess_and_predict_daily(.x, span = loess_span,
#                                              window_days = window_days)) %>%
#   ungroup() %>%
#   left_join(season_calendar, by = "jday") %>%
#   mutate(
#     contrib_ha = pred_per_day / 100,
#     Season     = factor(Season, levels = season_levels)
#   )

# Deterministic season calendar (contiguous, independent of IN)
season_calendar <- tibble(jday = 1:365) %>%
  mutate(
    Season_cal = cut(
      jday,
      breaks = c(0, 90, 182, 273, 365),   # 1–90, 91–182, 183–273, 274–365
      labels = c("Winter","Spring","Summer","Fall"),
      include.lowest = TRUE, right = TRUE
    ),
    Season_cal = factor(Season_cal, levels = season_levels)
  )

# Season bounds + handy dates (drop near where you define season_calendar)
season_bounds <- tibble(
  Season = factor(c("Winter","Spring","Summer","Fall"),
                  levels = c("Winter","Spring","Summer","Fall")),
  j_start = c(1, 91, 183, 274),
  j_end   = c(90, 182, 273, 365)
) %>%
  mutate(
    x_start = as.Date("2024-01-01") + (j_start - 1),
    x_end   = as.Date("2024-01-01") + (j_end   - 1),
    j_mid   = floor((j_start + j_end)/2),
    x_mid   = as.Date("2024-01-01") + (j_mid - 1)
  )


# --- LOESS predictions (unchanged), but attach Season_cal (not Season)
# daily_preds <- IN %>%
#   transmute(Sub_region, SiteID,
#             jday  = pmin(pmax(as.integer(jday), 1L), 365L),
#             slope) %>%
#   group_by(Sub_region, SiteID) %>%
#   group_modify(~ fit_loess_and_predict_daily(.x, span = loess_span,
#                                              window_days = window_days)) %>%
#   ungroup() %>%
#   left_join(season_calendar, by = "jday") %>%     # adds Season_cal
#   mutate(
#     contrib_ha = pred_per_day / 100
#   )

daily_preds <- INx %>%
  transmute(Sub_region, SiteID,
            jday = pmin(pmax(as.integer(jday), 1L), 365L),
            slope) %>%
  group_by(Sub_region, SiteID) %>%
  group_modify(~ fit_loess_and_predict_daily(.x, span = loess_span,
                                             window_days = window_days)) %>%
  ungroup() %>%
  left_join(season_calendar, by = "jday") %>%
  mutate(
    contrib_ha    = pred_per_day / 100,
    Season_cal    = factor(Season_cal, levels = season_levels),
    Season_used   = Season_cal,           # clear/explicit
    season_source = "calendar"
  )

# seasonal_jday <- daily_preds %>%
#   group_by(Sub_region, SiteID, Season_used) %>%
#   summarise(DIC_season_ha = sum(contrib_ha, na.rm = TRUE), .groups = "drop") %>%
#   rename(Season = Season_used)

# ---- Integrate annual LOESS into seasonal totals (with bootstrap if desired) ----
# res_boot <- integrate_all_sites(INx, season_calendar, B = 300, seed = 123)
res_boot <- integrate_all_sites(INx, season_calendar, window_days = window_days, B = 300, seed = 123)
seasonal_jday <- res_boot$point    # point estimates (like your old seasonal_jday)
boot_stats     <- res_boot$stats   # 95% CI per season


# --- labels for LOESS seasonal integrals (per ha) ---
lab_jday <- seasonal_jday %>%
  left_join(season_bounds, by = "Season") %>%
  group_by(SiteID) %>%
  mutate(
    y_top = max(INx$slope[INx$SiteID == first(SiteID)], na.rm = TRUE),
    y_bot = min(INx$slope[INx$SiteID == first(SiteID)], na.rm = TRUE),
    y_lab = if_else(DIC_season_ha >= 0, 0.85*y_top, 0.85*y_bot),
    lab   = scales::number(DIC_season_ha, accuracy = 0.01)
  ) %>% ungroup()

lab_jday_err <- boot_stats %>%
  left_join(season_bounds %>% dplyr::select(Season, x_start, x_end, x_mid), by = "Season") %>%
  mutate(
    y_low  = lwr95,
    y_high = upr95
  )

# --- seasonal totals using that single Season column ---
# seasonal_jday <- daily_preds %>%
#   group_by(Sub_region, SiteID, Season_cal) %>%
#   summarise(DIC_season_ha = sum(contrib_ha, na.rm = TRUE), .groups = "drop")

annual_jday <- seasonal_jday %>%
  group_by(Sub_region, SiteID) %>%
  summarise(annual_DIC_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")
library(dplyr)
library(purrr)
library(tibble)

loess_ci_by_site <- function(df_site, span = loess_span, win_days = window_days) {
  d <- df_site %>%
    tidyr::drop_na(jday, slope) %>%
    mutate(
      jday = pmax(pmin(as.integer(jday), 365L), 1L),
      slope_per_day = slope / win_days
    )

  if (nrow(d) < 5) return(tibble())  # not enough data to fit

  newd <- tibble(jday = 1:365)
  mod <- loess(slope_per_day ~ jday, data = d, span = span, degree = 1,
               control = loess.control(surface = "direct"))
  pr  <- predict(mod, newdata = newd, se = TRUE)

  tibble(
    SiteID = d$SiteID[1],
    jday   = newd$jday,
    jdate  = as.Date("2024-01-01") + (newd$jday - 1),
    fit    = as.numeric(pr$fit),
    lwr    = as.numeric(pr$fit) - 1.96 * as.numeric(pr$se.fit),
    upr    = as.numeric(pr$fit) + 1.96 * as.numeric(pr$se.fit)
  )
}

# Build CI lines for all sites shown in p_overlay
ci_all <- INx %>%
  group_by(SiteID) %>%
  group_split() %>%
  map_dfr(~ loess_ci_by_site(.x, span = loess_span, win_days = window_days))


# Per-day rate for each (Sub_region, SiteID, Season) from the quarterly method
# SSm_perday <- SSm %>%
#   transmute(Sub_region, SiteID, Season, rate_per_day = mddDIC)
# 
# # Calendar with Date for plotting
# calendar_base <- season_calendar %>%
#   transmute(
#     jday,
#     Season_cal,
#     jdate = as.Date("2024-01-01") + (jday - 1)
#   )
# 
# # Replicate per-day seasonal rate across every jday WITHIN THAT SEASON (many-to-many on purpose)
# # join by season name: Season (from SSm_perday) ↔ Season_cal (from calendar)
# quarterly_daily <- calendar_base %>%
#   inner_join(
#     SSm_perday %>% rename(Season_cal = Season),
#     by = "Season_cal",
#     relationship = "many-to-many"
#   )


SSm_perday <- SSm %>% transmute(Sub_region, SiteID, Season, rate_per_day = mddDIC)

calendar_base <- season_calendar %>%
  transmute(jday, Season_cal, jdate = as.Date("2024-01-01") + (jday - 1))

quarterly_daily <- calendar_base %>%
  inner_join(SSm_perday %>% rename(Season_cal = Season), by = "Season_cal",
             relationship = "many-to-many")

daily_preds_plot <- daily_preds %>%
  mutate(jdate = as.Date("2024-01-01") + (jday - 1),
         rate_per_day = pred_per_day)

# --- signed seasonal area under LOESS curve (relative to y=0) ---
loess_shade <- daily_preds %>%
  mutate(
    jdate  = as.Date("2024-01-01") + (jday - 1),
    Season = factor(Season_cal, levels = season_levels),
    ymin   = pmin(pred_per_day, 0),
    ymax   = pmax(pred_per_day, 0)
  ) %>%
  select(SiteID, Season, jdate, ymin, ymax)

p_overlay <- ggplot() +
  geom_col(data = quarterly_daily,
           # aes(jdate, rate_per_day, color = Season_cal),
           aes(jdate, rate_per_day, fill = Season_cal),
           width = 1, alpha = 0.17) +
  scale_fill_manual(values = season_pal, limits = names(season_pal)) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.3)+
  geom_line(data = daily_preds_plot,
            aes(jdate, rate_per_day, group = SiteID),
            linewidth = 0.8, color = "black") +
  geom_point(data = INx,
             aes(jdate, slope, color = Season_IN),
             alpha = 0.97, size = 1.8) +
  scale_color_manual(values = season_pal, limits = names(season_pal)) +
  scale_x_date(date_breaks = "1 month",
               labels = ~ substr(as.character(lubridate::month(., label=TRUE, abbr=TRUE)),1,1)) +
  labs(title = title_ndz("Quarterly (flat) vs J-day (LOESS) + raw points"),
       x = "Month",
       y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, " per day)"))) +
  facet_wrap(~ SiteID, scales = "free_y") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.title = element_blank())
# p_overlay

# 
# p_overlay_int <- p_overlay +
#   # shaded seasonal signed area under LOESS curve
#   geom_ribbon(
#     data = loess_shade,
#     aes(x = jdate, ymin = ymin, ymax = ymax,
#         fill = Season, group = interaction(SiteID, Season)),
#     alpha = 0.15, inherit.aes = FALSE
#   ) +
#     geom_line(
#     data = lab_jday_err,
#     aes(x = x_mid, y = y_low, group = interaction(SiteID, Season)),
#     linetype = "dashed", color = "grey40", inherit.aes = FALSE
#   ) +
#   geom_line(
#     data = lab_jday_err,
#     aes(x = x_mid, y = y_high, group = interaction(SiteID, Season)),
#     linetype = "dashed", color = "grey40", inherit.aes = FALSE
#   )+
#   # LOESS seasonal integrals (bold)
#   geom_text(
#     data = lab_jday,
#     aes(x = x_mid, y = y_lab, label = lab, color = Season),
#     size = 3.1, fontface = "bold", show.legend = FALSE
#   ) +
#   # Quarterly seasonal totals (plain)
#   geom_text(
#     data = lab_quarterly,
#     aes(x = x_mid, y = y_lab_q, label = lab_q, color = Season),
#     size = 2.8, fontface = "plain", alpha = 0.9, show.legend = FALSE
#   ) +
#   guides(fill = guide_legend(override.aes = list(alpha = 0.35))) +
  # labs(subtitle = "Shaded ribbon = signed seasonal area under LOESS curve.\nBold = LOESS seasonal integral; plain = Quarterly seasonal total.")

##this won't work with my version of ggplot (for the text label)
# p_overlay_int <- ggplot(IN, aes(x = jdate, y = slope)) +
#   # points
#   geom_point(aes(color = Season), alpha = 0.4) +
#   geom_hline(yintercept = 0, color = "black", linewidth = 0.3) +
#   # smooth LOESS curve
#   geom_smooth(aes(color = Sub_region, fill = Sub_region),
#               method = "loess", se = TRUE, span = 0.3, alpha = 0.1) +
# 
#   # labels from both methods
#   geom_text(data = lab_quarterly %>%
#               mutate(Method = "Quarterly"),
#             aes(x = x_mid, y = y_lab, label = lab, fontface = Method),
#             inherit.aes = FALSE) +
#   geom_text(data = lab_jday %>%
#               mutate(Method = "LOESS"),
#             aes(x = x_mid, y = y_lab, label = lab, fontface = Method),
#             inherit.aes = FALSE) +
# 
#   scale_fontface_manual(
#     values = c("Quarterly" = "plain",
#                "LOESS"     = "bold"),
#     name   = "Integration method"
#   ) +
# 
#   scale_color_season() +
#   scale_fill_subregion() +
#   scale_x_date(date_breaks = "1 month",
#                labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)),1,1)) +
#   labs(title = "Slope of ΔDIC vs Julian day",
#        x = "Month", y = expression(paste("Slope (", Delta, "DIC)")))

# p_overlay_int


# p_overlay_ci <- p_overlay +
#   geom_segment(
#     data = lab_jday_err,
#     aes(x = x_start, xend = x_end, y = y_low,  yend = y_low,  color = Season),
#     linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
#   ) +
#   geom_segment(
#     data = lab_jday_err,
#     aes(x = x_start, xend = x_end, y = y_high, yend = y_high, color = Season),
#     linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
#   ) +
#   geom_text(
#     data = lab_jday,
#     aes(x = x_mid, y = y_lab, label = lab, color = Season),
#     size = 3.1, fontface = "bold", show.legend = FALSE
#   )
# p_overlay_ci
# 
# 
# p_overlay_ci2 <- p_overlay +
#   geom_line(
#     data = ci_all,
#     aes(jdate, lwr, group = SiteID),
#     linetype = "dashed", linewidth = 0.5, color = "black"
#   ) +
#   geom_line(
#     data = ci_all,
#     aes(jdate, upr, group = SiteID),
#     linetype = "dashed", linewidth = 0.5, color = "black"
#   )
# 
# p_overlay_ci2

# Start from your existing base
p_overlay_ci_both <- p_overlay +
  # --- Seasonal integrals: dashed CI bands across each season window ---
  geom_segment(
    data = lab_jday_err,
    aes(x = x_start, xend = x_end, y = y_low,  yend = y_low,  color = Season),
    linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
  ) +
  geom_segment(
    data = lab_jday_err,
    aes(x = x_start, xend = x_end, y = y_high, yend = y_high, color = Season),
    linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
  ) +
  # labels for seasonal integrals (LOESS totals)
  geom_text(
    data = lab_jday,
    aes(x = x_mid, y = y_lab, label = lab, color = Season),
    size = 3.1, fontface = "bold", show.legend = FALSE, inherit.aes = FALSE
  ) +
  # --- LOESS curve CI: dashed lines around the solid LOESS line ---
  geom_line(
    data = ci_all,
    aes(jdate, lwr, group = SiteID),
    linetype = "dashed", linewidth = 0.5, color = "black", inherit.aes = FALSE
  ) +
  geom_line(
    data = ci_all,
    aes(jdate, upr, group = SiteID),
    linetype = "dashed", linewidth = 0.5, color = "black", inherit.aes = FALSE
  )

# p_overlay_ci_both



# Find per-site panel range
site_ranges <- INx %>%
  group_by(SiteID) %>%
  summarise(
    y_top = max(slope, na.rm = TRUE),
    y_bot = min(slope, na.rm = TRUE),
    .groups = "drop"
  )

# 1. Get per-site y_max
site_ranges <- INx %>%
  group_by(SiteID) %>%
  summarise(y_max = max(slope, na.rm = TRUE), .groups = "drop") %>%
  mutate(
    y_lab_bold = y_max * 1.25,   # 25% above
    y_lab_plain = y_max * 1.15   # slightly below
  )

# 2. Join onto your label data
lab_jday_top <- lab_jday %>%
  left_join(site_ranges, by = "SiteID") %>%
  mutate(y_lab = y_lab_bold)   # pin bold labels

lab_quarterly_top <- lab_quarterly %>%
  left_join(site_ranges, by = "SiteID") %>%
  mutate(y_lab = y_lab_plain)  # pin plain labels


# assumes you’ve already built:
# p_overlay, loess_shade, lab_jday_err (with x_start/x_end), lab_jday, lab_quarterly, ci_all

p_overlay_all <- p_overlay +
  # 1) signed seasonal area under LOESS curve (put early so it sits behind lines/labels)
  geom_ribbon(
    data = loess_shade,
    aes(x = jdate, ymin = ymin, ymax = ymax,
        fill = Season, group = interaction(SiteID, Season)),
    alpha = 0.17, inherit.aes = FALSE
  ) +
  # 2) seasonal integral CIs (dashed bands across each season window)
  geom_segment(
    data = lab_jday_err,
    aes(x = x_start, xend = x_end, y = y_low,  yend = y_low,  color = Season),
    linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
  ) +
  geom_segment(
    data = lab_jday_err,
    aes(x = x_start, xend = x_end, y = y_high, yend = y_high, color = Season),
    linetype = "dashed", linewidth = 0.5, inherit.aes = FALSE, alpha = 0.85
  ) +
  # 3) LOESS curve CIs (dashed lines around solid LOESS line)
  geom_line(
    data = ci_all,
    aes(jdate, lwr, group = SiteID),
    linetype = "dashed", linewidth = 0.5, color = "black", inherit.aes = FALSE
  ) +
  geom_line(
    data = ci_all,
    aes(jdate, upr, group = SiteID),
    linetype = "dashed", linewidth = 0.5, color = "black", inherit.aes = FALSE
  ) +
  # 4) seasonal total labels
  # geom_text(
  #   data = lab_jday,
  #   aes(x = x_mid, y = y_lab, label = lab, color = Season),
  #   size = 3.1, fontface = "bold", show.legend = FALSE, inherit.aes = FALSE
  # ) +
  # geom_text(
  #   data = lab_quarterly,
  #   aes(x = x_mid, y = y_lab_q, label = lab_q, color = Season),
  #   size = 2.8, fontface = "plain", alpha = 0.9, show.legend = FALSE, inherit.aes = FALSE
  # ) +
  guides(fill = guide_legend(override.aes = list(alpha = 0.35))) +
  labs(
    subtitle = paste(
      "Shaded = signed seasonal area under LOESS.",
      "Dashed colored segments = seasonal integral 95% CI.",
      "Dashed black lines = LOESS fit 95% CI.",
      # "Bold = LOESS seasonal total; Plain = Quarterly seasonal total.",
      sep = "\n"
    )
  )

p_overlay_all

# p_overlay_all2 <- p_overlay_all +
#   # replace plain labels with top-aligned
#   geom_text(
#     data = lab_quarterly2,
#     aes(x = x_mid, y = y_lab_fixed, label = lab_q, color = Season),
#     size = 2.8, fontface = "plain", alpha = 0.9, show.legend = FALSE,
#     inherit.aes = FALSE
#   ) +
#   # replace bold labels with bottom-aligned
#   geom_text(
#     data = lab_jday2,
#     aes(x = x_mid, y = y_lab_fixed, label = lab, color = Season),
#     size = 3.1, fontface = "bold", show.legend = FALSE,
#     inherit.aes = FALSE)
#   # )+
#   # scale_y_continuous(
#   #   expand = expansion(mult = c(0.1, 0.25))  # 10% below, 25% above
#   # )
# p_overlay_all2


# p_overlay_all_top <- p_overlay_all +
#   geom_text(
#     data = lab_jday_top,
#     aes(x = x_mid, y = y_lab, label = lab, color = Season),
#     size = 3.1, fontface = "bold", show.legend = FALSE, inherit.aes = FALSE
#   ) +
#   geom_text(
#     data = lab_quarterly_top,
#     aes(x = x_mid, y = y_lab, label = lab_q, color = Season),
#     size = 2.8, fontface = "plain", alpha = 0.9, show.legend = FALSE, inherit.aes = FALSE
#   ) +
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.35)))  # add headroom
# p_overlay_all_top



# season_check <- IN %>%
#   transmute(Sub_region, SiteID, jday = as.integer(jday),
#             Season_IN = factor(Season, levels = season_levels)) %>%
#   left_join(season_calendar, by = "jday") %>%   # adds Season_cal
#   mutate(mismatch = !is.na(Season_IN) & !is.na(Season_cal) & Season_IN != Season_cal)
# 
# # Tiny summary you can print
# mismatch_summary <- season_check %>%
#   count(Season_IN, Season_cal, mismatch) %>%
#   arrange(desc(n))
# mismatch_summary
# 
# # Optional: visualize where mismatches occur
# ggplot(season_check, aes(jday, SiteID, color = mismatch)) +
#   geom_point(alpha = 0.6) +
#   scale_color_manual(values = c(`TRUE` = "red", `FALSE` = "grey40")) +
#   labs(title = "Season label mismatches (IN vs canonical calendar)",
#        x = "Julian day", y = "SiteID") +
#   theme_bw()
# #nothing mismatched now. 

# Prep LOESS on date scale
daily_preds_plot <- daily_preds %>%
  mutate(jdate = as.Date("2024-01-01") + (jday - 1),
         rate_per_day = pred_per_day)
# 
# p_overlay <- ggplot() +
#   # Quarterly "flat" daily rates (background) by canonical season
#   geom_col(
#     data = quarterly_daily,
#     aes(x = jdate, y = rate_per_day, fill = Season_cal),
#     width = 1, alpha = 0.25, show.legend = TRUE
#   ) +
#   scale_fill_manual(values = season_pal, limits = names(season_pal)) +
# 
#   # J-day LOESS predictions (smooth line)
#   geom_line(
#     data = daily_preds_plot,
#     aes(x = jdate, y = rate_per_day, group = SiteID),
#     linewidth = 0.7, color = "black"
#   ) +
#   geom_point(
#   data = season_check %>% filter(mismatch) %>%
#            mutate(jdate = as.Date("2024-01-01") + (jday - 1)) %>%
#            left_join(IN %>% select(SiteID, jday, slope), by = c("SiteID","jday")),
#   aes(jdate, slope),
#   shape = 21, stroke = 0.8, fill = NA, color = "red", size = 2.4
# )+
#   # Raw points colored by IN's own Season labels (independent check)
#   geom_point(
#     data = IN,
#     aes(x = jdate, y = slope, color = Season),
#     alpha = 0.5, size = 1.8
#   ) +
#   scale_color_manual(values = season_pal, limits = names(season_pal)) +
# 
#   scale_x_date(date_breaks = "1 month",
#                labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)) +
#   labs(
#     title = title_ndz("Quarterly (flat) vs J-day (LOESS) + raw points"),
#     x = "Month",
#     y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, " per day)"))
#   ) +
#   facet_wrap(~ SiteID, scales = "free_y") +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1),
#         legend.title = element_blank())



# # Per-day rate for each (Sub_region, SiteID, Season) from the quarterly method.
# # mddDIC is the median daily slope for the season, so it's already "per day".
# SSm_perday <- SSm %>%
#   transmute(Sub_region, SiteID, Season, rate_per_day = mddDIC)
# 
# # Calendar for all jdays with a Date for plotting
# calendar_base <- season_calendar %>%
#   transmute(
#     jday  = as.integer(jday),
#     Season = factor(Season, levels = season_levels),
#     jdate = as.Date("2024-01-01") + (jday - 1)
#   )
# 
# # Replicate that per-day seasonal rate across *every jday in that season*
# # for each site & subregion. This is many-to-many on purpose.
# quarterly_daily <- calendar_base %>%
#   inner_join(SSm_perday, by = "Season", relationship = "many-to-many")
# 
# # --- 2) Prep the J-day LOESS predictions for plotting on date scale ---
# daily_preds_plot <- daily_preds %>%
#   mutate(
#     jdate = as.Date("2024-01-01") + (jday - 1),
#     # pred_per_day is already "per day" from the helper
#     rate_per_day = pred_per_day
#   )
# 
# # --- 3) Build the overlay plot --------------------------------------------
# # Layer order: seasonal "flat" background (by Season) + LOESS line (J-day) + raw points
# # Facet by SiteID (free_y optional; set to "free_y" if sites have very different ranges)
# 
# p_overlay <- ggplot() +
#   # Seasonal "flat" daily rates (background); alpha keeps bars subtle
#   geom_col(
#     data = quarterly_daily,
#     aes(x = jdate, y = rate_per_day, fill = Season),
#     width = 1, alpha = 0.25, show.legend = TRUE
#   ) +
#   scale_fill_season() +
# 
#   # J-day LOESS predictions (smooth daily variability)
#   geom_line(
#     data = daily_preds_plot,
#     aes(x = jdate, y = rate_per_day, group = 1),
#     linewidth = 0.8, color = "black"
#   ) +
# 
#   # Raw sample points used in the LOESS (helps show data support)
#   geom_point(
#     data = IN,
#     aes(x = jdate, y = slope, color = Season),
#     alpha = 0.5, size = 1.8
#   ) +
#   scale_color_season() +
# 
#   # X axis = months as first letters
#   scale_x_date(
#     date_breaks = "1 month",
#     labels = function(x) substr(as.character(lubridate::month(x, label = TRUE, abbr = TRUE)), 1, 1)
#   ) +
#   labs(
#     title = title_ndz("Quarterly (flat) vs J-day (LOESS) + raw points"),
#     x = "Month",
#     y = expression(paste("Slope (", Delta,"DIC ", mu,"mol kg"^{-1}, " km"^{-2}, " per day)"))
#   ) +
#   facet_wrap(~ SiteID, scales = "free_y") +
#   theme_bw() +
#   theme(
#     axis.text.x = element_text(angle = 45, hjust = 1),
#     legend.title = element_blank()
#   )
# 

##with labels: 
# collect all y's that appear in the plot
y_all <- c(
  INx$slope,
  daily_preds_plot$rate_per_day,
  loess_shade$ymin, loess_shade$ymax,
  ci_all$lwr, ci_all$upr,
  lab_jday_err$y_low, lab_jday_err$y_high
)
y_all <- y_all[is.finite(y_all)]

# global limits + padding
pad_top <- 0.18   # leave headroom for top labels
pad_bot <- 0.12   # leave footroom for bottom labels
y_min   <- min(y_all, na.rm = TRUE)
y_max   <- max(y_all, na.rm = TRUE)
yrange  <- y_max - y_min
ylims   <- c(y_min - pad_bot*yrange, y_max + pad_top*yrange)

p_overlay_all_fixed <- p_overlay_all +
  scale_y_continuous(limits = ylims, expand = expansion(mult = c(0, 0))) +
  facet_wrap(~ SiteID, scales = "fixed")  # or just remove 'scales' entirely

# total range
yrange <- ylims[2] - ylims[1]

# put both rows near the top
y_row1 <- ylims[2] - 0.05 * yrange   # Quarterly (plain) higher
y_row2 <- ylims[2] - 0.10 * yrange   # LOESS (bold) just below

lab_quarterly_fixed <- lab_quarterly %>%
  mutate(y_fixed = y_row1)

lab_jday_fixed <- lab_jday %>%
  mutate(y_fixed = y_row2)

p_overlay_all_fixed <- p_overlay_all_fixed +
  geom_text(
    data = lab_jday_fixed,      # LOESS (bold) slightly lower
    aes(x = x_mid, y = y_fixed, label = lab, color = Season),
    size = 3.1, fontface = "bold", show.legend = FALSE, inherit.aes = FALSE
  ) +
  geom_text(
    data = lab_quarterly_fixed, # Quarterly (plain) slightly higher
    aes(x = x_mid, y = y_fixed, label = lab_q, color = Season),
    size = 2.8, fontface = "plain", alpha = 0.9, show.legend = FALSE, inherit.aes = FALSE
  )


p_overlay_all_fixed

# --- Hectares required to offset OA (Jday/LOESS version) ---


# Convert per ha → per m2 and compute area to offset +5.35 µmol/kg/yr
#    (Positive required area when annual_DIC_m2 is negative—i.e., drawdown.)
nAnn_jday <- annual_jday %>%
  mutate(
    annual_DIC_m2 = annual_DIC_ha / 1e4,
    SGm2_offOA    = -5.35 /  annual_DIC_m2,
    SG_HA_offOA   = SGm2_offOA / 1e4              # hectares required
  ) %>%
  mutate(                                       # clean divide-by-zero/infs
    SGm2_offOA  = if_else(is.finite(SGm2_offOA),  SGm2_offOA,  NA_real_),
    SG_HA_offOA = if_else(is.finite(SG_HA_offOA), SG_HA_offOA, NA_real_)
  ) %>%
  mutate(                                       # optional: stable x order
    SiteID = fct_inorder(as.factor(SiteID))
  )


# ---- Align x for j0 (annual_jday) and j1 (nAnn_jday) ----
COMMON_X_J <- build_common_x_levels(
  list(annual_jday, nAnn_jday),
  x_col = "SiteID",
  canonical = site_levels
)
annual_jday <- apply_x_levels(annual_jday, "SiteID", COMMON_X_J)
nAnn_jday   <- apply_x_levels(nAnn_jday,   "SiteID", COMMON_X_J)

# (Optional) Diagnostic scatter: required ha vs annual ΔDIC per m²
j2 <- ggplot(nAnn_jday, aes(x = annual_DIC_m2, y = SG_HA_offOA, color = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_vline(xintercept = 0, linewidth = 0.3, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.8, na.rm = TRUE) +
  scale_color_subregion() +                          # NEW
  scale_x_continuous(labels = scales::scientific) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Diagnostic (Jday): Required hectares vs. annual ΔDIC per m²",
    x = expression(paste("Annual ", Delta, "DIC per m"^2)),
    y = "Required seagrass area (ha)"
  ) +
  theme_bw()
j2

j0 <- ggplot(annual_jday, aes(SiteID, annual_DIC_ha, fill = Sub_region)) +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  geom_col(width = 0.7) +                            # SAME width in both
  scale_fill_subregion() +
  scale_x_locked(COMMON_X_J, expand_add = 0.6, drop = FALSE) +  # lock x
  labs(title = title_ndz("Annual (jday) ΔDIC per ha"),
       x = "SiteID",
       y = expression(paste(Delta, "DIC (", mu, "mol kg"^{-1}, ") per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
j0 

j1 <- ggplot(nAnn_jday, aes(SiteID, SG_HA_offOA, fill = Sub_region)) +
  geom_col(width = 0.7) +                            # SAME width in both
  scale_fill_subregion() +
  scale_x_locked(COMMON_X_J, expand_add = 0.6, drop = FALSE) +  # lock x
  scale_y_break(c(40, 190)) +
  geom_text(aes(label = scales::comma(SG_HA_offOA, accuracy = 0.1)),
            vjust = 1.2, color = "white", size = 3) +
  labs(title = title_ndz("Seagrass area (hectares) required to offset OA"),
       x = "SiteID", y = "Seagrass Area (hectares)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
j1

# --- Quarterly (from your SSm/nAnn workflow) ---
quarterly_season <- SSm %>%
  mutate(DIC_season_ha = mddDIC91 / 100)

annual_quarterly <- quarterly_season %>%
  group_by(SiteID) %>%
  summarise(annual_ha = sum(DIC_season_ha, na.rm = TRUE), .groups = "drop")

# site_lvls_plot <- intersect(site_levels, unique(as.character(quarterly_season$SiteID)))
# quarterly_season <- quarterly_season %>% mutate(SiteID = factor(SiteID, levels = site_lvls_plot))
# seasonal_jday    <- seasonal_jday    %>% mutate(SiteID = factor(SiteID, levels = site_lvls_plot))

# ---- Align x for p_quarterly and p_jday ----
COMMON_X_QJ <- build_common_x_levels(
  list(quarterly_season, seasonal_jday),
  x_col = "SiteID",
  canonical = site_levels
)

quarterly_season <- apply_x_levels(quarterly_season, "SiteID", COMMON_X_QJ)
seasonal_jday    <- apply_x_levels(seasonal_jday,    "SiteID", COMMON_X_QJ)


p_quarterly <- ggplot(quarterly_season,
                      aes(SiteID, DIC_season_ha, fill = Season)) +
  geom_col(position = "stack") +
  scale_fill_season() +
  scale_x_locked(COMMON_X_QJ) + 
  labs(title = "Quarterly method (Season × 91.3 days)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
# theme(axis.text.x = element_blank(),
  #       axis.ticks.x = element_blank(),
  #       axis.title.x = element_blank())
# p_quarterly 

# --- Jday LOESS (from daily_preds) ---
p_jday <- ggplot(seasonal_jday,
                 aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "stack") +
  scale_fill_season() +                               # NEW
  labs(title = "Jday method (daily to seasonal sum)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
# p_jday

# --- Combine with patchwork ---
p_quarterly | p_jday

# Compute per-season per-ha for quarterly
quarterly_season <- SSm %>%
  mutate(DIC_season_ha = mddDIC91 / 100)

# Compute per-season per-ha for jday (you already have seasonal_jday)
# seasonal_jday: cols SiteID, Season, DIC_season_ha

# Make sure Season is a factor in the same order for both
quarterly_season <- quarterly_season %>%
  mutate(Season = factor(Season, levels = c("Winter","Spring","Summer","Fall")))
seasonal_jday <- seasonal_jday %>%
  mutate(Season = factor(Season, levels = c("Winter","Spring","Summer","Fall")))

# Common y-limit across both plots (optional but helpful for visual comparison)
y_max <- max(
  quarterly_season$DIC_season_ha,
  seasonal_jday$DIC_season_ha,
  na.rm = TRUE
) * 1.05
y_lim <- c(0, y_max)

# Grouped (side-by-side) bars 
# 
# p_quarterly <- ggplot(quarterly_season,
#                       aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
#   geom_col(position = "dodge") +
#   labs(title = "Quarterly method (Season × 91.3 days)",
#        y = expression(paste(Delta, "DIC per ha"))) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.05)))
# 
# p_jday <- ggplot(seasonal_jday,
#                       aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
#   geom_col(position = "dodge") +
#   labs(title = "Jday LOESS method (daily to season sum)",
#        y = expression(paste(Delta, "DIC per ha"))) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.05)))
# 
# 
# # Side-by-side comparison
# (p_quarterly | p_jday) + patchwork::plot_layout(guides = "collect") &
#   theme(legend.position = "top")


# Compute global y-limits across both methods
y_range <- range(
  c(quarterly_season$DIC_season_ha, seasonal_jday$DIC_season_ha),
  na.rm = TRUE
)

# Quarterly plot
p_quarterly <- ggplot(quarterly_season,
                      aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "dodge") +
  scale_fill_season()+
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  labs(title = "Quarterly method (Season × 91.3 days)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = y_range,
                     expand = expansion(mult = c(0.05, 0.05)))

# Jday plot
p_jday <- ggplot(seasonal_jday,
                 aes(x = SiteID, y = DIC_season_ha, fill = Season)) +
  geom_col(position = "dodge") +
  scale_fill_season()+
  geom_hline(yintercept = 0, linewidth = 0.3, color = "black") +
  labs(title = "Jday method (daily to season sum)",
       y = expression(paste(Delta, "DIC per ha"))) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(limits = y_range,
                     expand = expansion(mult = c(0.05, 0.05)))

# Side-by-side comparison with shared legend
two =(p_quarterly | p_jday) + patchwork::plot_layout(guides = "collect") &
  theme(legend.position = "top")
two

## what about hectares needed comparison? 
# ---- session info ----
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0 ggbreak_0.1.6   scales_1.3.0    lubridate_1.9.3
##  [5] forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4     purrr_1.0.2    
##  [9] readr_2.1.5     tidyr_1.3.1     tibble_3.2.1    ggplot2_3.5.1  
## [13] tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1   xfun_0.52          bslib_0.8.0        lattice_0.22-6    
##  [5] splines_4.2.1      ggfun_0.2.0        colorspace_2.1-1   vctrs_0.6.5       
##  [9] generics_0.1.3     htmltools_0.5.8.1  mgcv_1.8-40        yaml_2.3.10       
## [13] utf8_1.2.4         gridGraphics_0.5-1 rlang_1.1.4        jquerylib_0.1.4   
## [17] pillar_1.9.0       glue_1.8.0         withr_3.0.1        rappdirs_0.3.3    
## [21] bit64_4.0.5        lifecycle_1.0.4    munsell_0.5.1      gtable_0.3.5      
## [25] evaluate_0.24.0    labeling_0.4.3     knitr_1.50         tzdb_0.4.0        
## [29] fastmap_1.2.0      parallel_4.2.1     fansi_1.0.6        cachem_1.1.0      
## [33] vroom_1.6.5        jsonlite_1.8.8     farver_2.1.2       bit_4.0.5         
## [37] fs_1.6.4           hms_1.1.3          digest_0.6.37      aplot_0.2.8       
## [41] stringi_1.8.4      grid_4.2.1         cli_3.6.3          tools_4.2.1       
## [45] yulab.utils_0.2.1  magrittr_2.0.3     sass_0.4.9         crayon_1.5.3      
## [49] pkgconfig_2.0.3    Matrix_1.6-4       ggplotify_0.1.2    timechange_0.3.0  
## [53] rmarkdown_2.29     rstudioapi_0.16.0  R6_2.5.1           nlme_3.1-157      
## [57] compiler_4.2.1