Downloading libraries for project:

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.5.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(patchwork)

## Warning: package 'patchwork' was built under R version 4.5.2

library(ggplot2)
library(readr)
library(cowplot)

## Warning: package 'cowplot' was built under R version 4.5.2

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:patchwork':
## 
##     align_plots
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(rlang)

## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

library(knitr)
library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.5.2

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(nlme)

## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse

library(purrr)
library(emmeans)

## Warning: package 'emmeans' was built under R version 4.5.2

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

Importing data

Functions for creating figures:

# importing data sets
Flux_Data_With_Covars <- read_csv("C:/Users/KAUFMADA/Documents/R_Files/windrow-fluxes-co2-model-improvements/windrow-fluxes-co2-model-improvements/data_clean/Flux_Data_With_Covars.csv")

## Rows: 21090 Columns: 82
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Pile, Chamber_Corrected, Turning_Status, Date, DATE_TIME
## dbl (77): Chamber_Elevation, DOY, air_temperature, RH, VPD, Tdew, wind_speed...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Cleaning data: Removing 9999 and -9999 which is NAN Removing unrealistic pile moisture and temperature data - 0-99 c range for pile temp - volumetric moisture 0.01 - 0.90

Creating daily observations (daily_pile_doy) where values are averaged by day for more interpretable figures

Creating weekly observation (summarise_weekly_mean_se) where values are averaged by week

# function used to remove NA values which LICOR uses -9999 for:

clean_numeric <- function(x,
                          min_val = -Inf,
                          max_val = Inf,
                          bad_values = c(-9999, -9999, 9999, 99999)) {
  x <- as.numeric(x)
  x[x %in% bad_values] <- NA_real_
  x[x < min_val | x > max_val] <- NA_real_
  x
}

# function that calculates the SE of a function: 

se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

Cleaned_Data <- Flux_Data_With_Covars %>%
  mutate(
    # DOY has decimals, so collapse to whole day
    DOY = floor(DOY),

    pile_temp   = clean_numeric(TS_1.initial_value,  min_val = 0, max_val = 99),
    pile_moist  = clean_numeric(SWC_1.initial_value, min_val = 0.01, max_val = 0.90),
    BulkDensity = clean_numeric(BulkDensity,         min_val = 0.01, max_val = 1)
  )

daily_pile_doy <- Cleaned_Data %>%
  group_by(Pile, DOY) %>%
  summarise(
    n_obs           = sum(!is.na(pile_temp) | !is.na(pile_moist)),
    pile_temp_mean  = mean(pile_temp,  na.rm = TRUE),
    pile_temp_se    = se(pile_temp),
    pile_moist_mean = mean(pile_moist, na.rm = TRUE),
    pile_moist_se   = se(pile_moist),
    .groups = "drop"
  )


# making weekly figures to reduce noise for final paper figures: 

summarise_weekly_mean_se <- function(df,
                                     doy,
                                     value,
                                     group,
                                     min_n = 2) {

  doy   <- enquo(doy)
  value <- enquo(value)
  group <- enquo(group)

  df %>%
    filter(!is.na(!!value), !is.na(!!doy), !is.na(!!group)) %>%
    mutate(
      week = floor((!!doy - 1) / 7) + 1
    ) %>%
    group_by(!!group, week) %>%
    summarise(
      DOY  = mean(!!doy, na.rm = TRUE),   # x-position for plotting
      mean = mean(!!value, na.rm = TRUE),
      sd   = sd(!!value, na.rm = TRUE),
      n    = sum(!is.na(!!value)),
      se   = sd / sqrt(n),
      .groups = "drop"
    ) %>%
    filter(n >= min_n)
}

weekly_moist <- summarise_weekly_mean_se(
  df    = Cleaned_Data,
  doy   = DOY,
  value = pile_moist,
  group = Pile,
  min_n = 2
)
weekly_temp <- summarise_weekly_mean_se(
  df    = Cleaned_Data,
  doy   = DOY,
  value = pile_temp,
  group = Pile,
  min_n = 2
)

Creating a function to Plot Temperature and Moisture

plot_points_err <- function(df,
                            x, y_mean, y_se,
                            group,
                            cols = NULL,
                            shapes = NULL,
                            point_size = 2.0,
                            point_alpha = 0.85,
                            err_alpha = 0.6,
                            err_lwd = 0.4,
                            err_width = 0,
                            base_size = 12,
                            legend_position = "top",
                            drop_na = TRUE,
                            x_lab = NULL,
                            y_lab = NULL,
                            legend_title = NULL,
                            vline = NULL,
                            vlines_red = NULL,
                            vlines_black = NULL,
                            vline_lwd = 0.4,
                            vline_alpha = 0.8,
                            vline_linetype = "dashed",
                            vline_red_linetype   = "solid",
                            vline_black_linetype = "dashed",
                            vline_red_colour     = "#9b1c31",
                            vline_black_colour   = "#000000"){

  # capture column names
  x      <- enquo(x)
  y_mean <- enquo(y_mean)
  y_se   <- enquo(y_se)
  group  <- enquo(group)

  # optionally drop NA rows for required columns
  if (drop_na) {
    df <- df[complete.cases(
      df[, c(as_name(x), as_name(y_mean), as_name(y_se), as_name(group))]
    ), ]
  }

  p <- ggplot(df, aes(x = !!x, y = !!y_mean)) +

    # error bars first (so points draw on top)
    geom_errorbar(
      aes(ymin = (!!y_mean) - (!!y_se),
          ymax = (!!y_mean) + (!!y_se),
          colour = !!group),
      width = err_width,
      alpha = err_alpha,
      linewidth = err_lwd
    ) 
  
  if (!is.null(vlines_red)) {
  p <- p + geom_vline(
    xintercept = vlines_red,
    colour     = vline_red_colour,
    linetype   = vline_red_linetype,
    linewidth  = vline_lwd,
    alpha      = vline_alpha
  )
}

if (!is.null(vlines_black)) {
  p <- p + geom_vline(
    xintercept = vlines_black,
    colour     = vline_black_colour,
    linetype   = vline_black_linetype,
    linewidth  = vline_lwd,
    alpha      = vline_alpha
  )
  }
  p <- p +

    # points (no lines)
    geom_point(
      aes(colour = !!group, shape = !!group),
      size = point_size,
      alpha = point_alpha
    ) +

    theme_bw(base_size = base_size) +
    theme(
      legend.position = legend_position,
      panel.grid.minor = element_blank()
    )

  # manual scales if provided (keeps consistent styling across plots)
  if (!is.null(cols)) {
    p <- p + scale_colour_manual(values = cols)
  }
  if (!is.null(shapes)) {
    p <- p + scale_shape_manual(values = shapes)
  }
# ── LABELS SECTION ─────────────────────────────
  if (!is.null(x_lab) || !is.null(y_lab) || !is.null(legend_title)) {
    p <- p + labs(
      x = x_lab,
      y = y_lab,
      colour = legend_title,
      shape  = legend_title
    )
  }
  return(p)
}

Average Temperature and Moisture Plots with SE bars for each pile done by DOY and Week

#turning data to add to plots if needed
turns_both_red <- c(159, 186, 208, 234, 262, 290)
turns_control_black <- c(173, 191, 222, 249, 276)
turns_both_red_week <- c(159, 186, 208, 234, 262, 290)/7
turns_control_black_week <- c(173, 191, 222, 249, 276)/7


pile_cols   <- c(C = "#000000", E = "#9b1c31")
pile_shapes <- c(C = 16, E = 17)


fig_moist_doy <- plot_points_err(
  daily_pile_doy,
  x = DOY,
  y_mean = pile_moist_mean,
  y_se = pile_moist_se,
  group = Pile,
  cols = pile_cols,
  shapes = pile_shapes,
  point_size = 1.8,
  x_lab = "Day of year",
  y_lab = expression("Volumetric water content (m"^3*" m"^-3*")"),
  legend_title = "Pile",

  vlines_red   = turns_both_red,
  vlines_black = turns_control_black,

  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_moist_doy

fig_moist_weekly <- plot_points_err(
  weekly_moist,
  x = week,
  y_mean = mean,
  y_se =  se,
  group = Pile,
  cols = pile_cols,
  shapes = pile_shapes,
  point_size = 1.8,
  x_lab = "Week of year",
  y_lab = expression("Volumetric water content (m"^3*" m"^-3*")"),
  legend_title = "Pile",
  
  vlines_red   = turns_both_red_week,
  vlines_black = turns_control_black_week,

  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_moist_weekly

fig_temp_doy <- plot_points_err(
  daily_pile_doy,
  x = DOY,
  y_mean =pile_temp_mean,
  y_se =  pile_temp_se,
  group = Pile,
  cols = pile_cols,
  shapes = pile_shapes,
  point_size = 1.8,
  x_lab = "Day of year",
  y_lab = expression("Pile Temperature Degrees Celsius"),
  legend_title = "Pile",
  
  vlines_red   = turns_both_red,
  vlines_black = turns_control_black,

  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_temp_doy

fig_temp_weekly <- plot_points_err(
  weekly_temp,
  x = week,
  y_mean =mean,
  y_se =  se,
  group = Pile,
  cols = pile_cols,
  shapes = pile_shapes,
  point_size = 1.8,
  x_lab = "Week of year",
  y_lab = expression("Pile Temperature Degrees Celsius"),
  legend_title = "Pile",
  
  vlines_red   = turns_both_red_week,
  vlines_black = turns_control_black_week,

  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_temp_weekly

Plotting chamber pile temperature all points (by chamber individually or all chambers in one plot) with turning events

library(patchwork)
chamber_cols <- c(
  "C1" = "#E41A1C",  # red
  "C2" = "#377EB8",  # blue
  "C3" = "#4DAF4A",  # green
  "C4" = "#FF7F00",  # orange
  "C5" = "#984EA3",  # purple
  "C6" = "#A65628"   # brown
)

plot_temp <- function(df,
                      x,
                      y,
                      group,
                      cols = NULL,
                      base_size = 11,
                      legend_position = "top",
                      x_lab = NULL,
                      y_lab = "Temperature (°C)",
                      legend_title = NULL,
                      point_size = 0.8,
                      point_alpha = 0.4,
                      vlines_red = NULL,
                      vlines_black = NULL,
                      vline_lwd = 0.4,
                      vline_red_linetype   = "solid",
                      vline_black_linetype = "dashed",
                      vline_red_colour     = "#9b1c31",
                      vline_black_colour   = "#000000") {

  x     <- enquo(x)
  y     <- enquo(y)
  group <- enquo(group)

  p <- ggplot(df, aes(x = !!x, y = !!y,
                      colour = factor(!!group),
                      group  = factor(!!group))) +
    geom_point(size = point_size, alpha = point_alpha, shape = 16) +
    theme_bw(base_size = base_size) +
    theme(
      legend.position  = legend_position,
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(colour = "grey92", linewidth = 0.3),
      axis.text        = element_text(size = 9),
      axis.title       = element_text(size = 10),
      legend.text      = element_text(size = 8),
      legend.key.size  = unit(0.35, "cm")
    ) +
    labs(x      = x_lab,
         y      = y_lab,
         colour = legend_title)

  if (!is.null(cols)) {
    p <- p + scale_colour_manual(values = cols)
  }

  if (!is.null(vlines_red)) {
    p <- p + geom_vline(xintercept = vlines_red,
                        colour    = vline_red_colour,
                        linetype  = vline_red_linetype,
                        linewidth = vline_lwd,
                        alpha     = 0.8)
  }

  if (!is.null(vlines_black)) {
    p <- p + geom_vline(xintercept = vlines_black,
                        colour    = vline_black_colour,
                        linetype  = vline_black_linetype,
                        linewidth = vline_lwd,
                        alpha     = 0.8)
  }

  return(p)
}


fig_temp_chamber_raw <- plot_temp(
  df           = Cleaned_Data,
  x            = DOY.initial_value,
  y            = pile_temp,
  group        = Chamber_Corrected,
  cols         = chamber_cols,
  y_lab        = expression("Temperature (°C)"),
  x_lab        = "Day of year",
  legend_title = "Chamber",
  vlines_red   = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_temp_chamber_raw

## Warning: Removed 2478 rows containing missing values or values outside the scale range
## (`geom_point()`).

chamber_plots <- Cleaned_Data %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_temp,
    group        = Chamber_Corrected,
    y_lab        = expression("Temperature (°C)"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols,
    vlines_red   = turns_both_red,
    vlines_black = turns_control_black
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col  <- chamber_plots[["C1"]] / chamber_plots[["C2"]] / chamber_plots[["C3"]]
right_col <- chamber_plots[["C4"]] / chamber_plots[["C5"]] / chamber_plots[["C6"]]

fig_temp_individual <- left_col | right_col
fig_temp_individual

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

chamber_plots_early <- Cleaned_Data %>%
#  filter(DOY < 159) %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_temp,
    group        = Chamber_Corrected,
    y_lab        = expression("Temperature (°C)"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols)
 +
      
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_early  <- chamber_plots_early[["C1"]] / chamber_plots_early[["C2"]] / chamber_plots_early[["C3"]]
right_col_early <- chamber_plots_early[["C4"]] / chamber_plots_early[["C5"]] / chamber_plots_early[["C6"]]

fig_temp_individual_early <- left_col_early | right_col_early
fig_temp_individual_early

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

vlines_red_window   <- turns_both_red[turns_both_red >= 159]
vlines_black_window <- turns_control_black[turns_control_black >= 159]

# C1, C2, C3 - control chambers: both red and black lines
control_plots <- Cleaned_Data %>%
  filter( Chamber_Corrected %in% c("C1", "C2", "C3")) %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_temp,
    group        = Chamber_Corrected,
    y_lab        = expression("Temperature (°C)"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols,
    vlines_red   = vlines_red_window,
    vlines_black = vlines_black_window,
    vline_red_linetype   = "solid",
    vline_black_linetype = "dashed"
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

# C4, C5, C6 - experimental chambers: red lines only
exp_plots <- Cleaned_Data %>%
  filter( Chamber_Corrected %in% c("C4", "C5", "C6")) %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_temp,
    group        = Chamber_Corrected,
    y_lab        = expression("Temperature (°C)"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols,
    vlines_red   = vlines_red_window,
    vline_red_linetype = "solid"
    # no vlines_black
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_early_II  <- control_plots[["C1"]] / control_plots[["C2"]] / control_plots[["C3"]]
right_col_early_II <- exp_plots[["C4"]]     / exp_plots[["C5"]]     / exp_plots[["C6"]]

fig_temp_individual_early_II <- left_col_early_II | right_col_early_II
fig_temp_individual_early_II

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

Un averaged moisture by chamber, looking at whole experimental cycle and at early turning events.

library(patchwork)
chamber_cols <- c(
  "C1" = "#E41A1C",  # red
  "C2" = "#377EB8",  # blue
  "C3" = "#4DAF4A",  # green
  "C4" = "#FF7F00",  # orange
  "C5" = "#984EA3",  # purple
  "C6" = "#A65628"   # brown
)

plot_temp <- function(df,
                      x,
                      y,
                      group,
                      cols = NULL,
                      base_size = 11,
                      legend_position = "top",
                      x_lab = NULL,
                      y_lab = "Temperature (°C)",
                      legend_title = NULL,
                      point_size = 0.8,
                      point_alpha = 0.4,
                      vlines_red = NULL,
                      vlines_black = NULL,
                      vline_lwd = 0.4,
                      vline_red_linetype   = "solid",
                      vline_black_linetype = "dashed",
                      vline_red_colour     = "#9b1c31",
                      vline_black_colour   = "#000000") {

  x     <- enquo(x)
  y     <- enquo(y)
  group <- enquo(group)

  p <- ggplot(df, aes(x = !!x, y = !!y,
                      colour = factor(!!group),
                      group  = factor(!!group))) +
    geom_point(size = point_size, alpha = point_alpha, shape = 16) +
    theme_bw(base_size = base_size) +
    theme(
      legend.position  = legend_position,
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(colour = "grey92", linewidth = 0.3),
      axis.text        = element_text(size = 6),
      axis.title       = element_text(size = 6),
      legend.text      = element_text(size = 8),
      legend.key.size  = unit(0.35, "cm")
    ) +
    labs(x      = x_lab,
         y      = y_lab,
         colour = legend_title)

  if (!is.null(cols)) {
    p <- p + scale_colour_manual(values = cols)
  }

  if (!is.null(vlines_red)) {
    p <- p + geom_vline(xintercept = vlines_red,
                        colour    = vline_red_colour,
                        linetype  = vline_red_linetype,
                        linewidth = vline_lwd,
                        alpha     = 0.8)
  }

  if (!is.null(vlines_black)) {
    p <- p + geom_vline(xintercept = vlines_black,
                        colour    = vline_black_colour,
                        linetype  = vline_black_linetype,
                        linewidth = vline_lwd,
                        alpha     = 0.8)
  }

  return(p)
}


fig_temp_chamber_raw <- plot_temp(
  df           = Cleaned_Data,
  x            = DOY.initial_value,
  y            = pile_moist,
  group        = Chamber_Corrected,
  cols         = chamber_cols,
  y_lab        = expression("Volumetric water content (m"^3*" m"^-3*")"),
  x_lab        = "Day of year",
  legend_title = "Chamber",
  vlines_red   = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype   = "solid",
  vline_black_linetype = "dashed"
)

fig_temp_chamber_raw

## Warning: Removed 10118 rows containing missing values or values outside the scale range
## (`geom_point()`).

chamber_plots <- Cleaned_Data %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_moist,
    group        = Chamber_Corrected,
    y_lab        = expression("Volumetric water content (m"^3*" m"^-3*")"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols,
    vlines_red   = turns_both_red,
    vlines_black = turns_control_black
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col  <- chamber_plots[["C1"]] / chamber_plots[["C2"]] / chamber_plots[["C3"]]
right_col <- chamber_plots[["C4"]] / chamber_plots[["C5"]] / chamber_plots[["C6"]]

fig_temp_individual <- left_col | right_col
fig_temp_individual

## Warning: Removed 288 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1723 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 768 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1259 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 3138 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2942 rows containing missing values or values outside the scale range
## (`geom_point()`).

chamber_plots_early <- Cleaned_Data %>%
  filter(DOY < 159) %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_moist,
    group        = Chamber_Corrected,
    y_lab        = expression("Volumetric water content (m"^3*" m"^-3*")"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols)
 +
      
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_early  <- chamber_plots_early[["C1"]] / chamber_plots_early[["C2"]] / chamber_plots_early[["C3"]]
right_col_early <- chamber_plots_early[["C4"]] / chamber_plots_early[["C5"]] / chamber_plots_early[["C6"]]

fig_temp_individual_early <- left_col_early | right_col_early
fig_temp_individual_early

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 170 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 175 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 55 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 185 rows containing missing values or values outside the scale range
## (`geom_point()`).

chamber_plots_early_II <- Cleaned_Data %>%
  filter(DOY < 173) %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY.initial_value,
    y            = pile_moist,
    group        = Chamber_Corrected,
    y_lab        = expression("Volumetric water content (m"^3*" m"^-3*")"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_early_II  <- chamber_plots_early_II[["C1"]] / chamber_plots_early_II[["C2"]] / chamber_plots_early_II[["C3"]]
right_col_early_II <- chamber_plots_early_II[["C4"]] / chamber_plots_early_II[["C5"]] / chamber_plots_early_II[["C6"]]

fig_temp_individual_early_II <- left_col_early_II | right_col_early_II
fig_temp_individual_early_II

## Warning: Removed 272 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 174 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 753 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 670 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 398 rows containing missing values or values outside the scale range
## (`geom_point()`).

Adding data _ by trace gas where each has individual emissions data

library(readxl)

co2_raw <-  read.csv("C:/Users/KAUFMADA/Documents/R_Files/windrow-fluxes-co2-model-improvements/windrow-fluxes-co2-model-improvements/data_clean/CO2_R_2_Filtered.csv")

ch4_raw <-  read.csv("C:/Users/KAUFMADA/Documents/R_Files/windrow-fluxes-co2-model-improvements/windrow-fluxes-co2-model-improvements/data_clean/CH4_R_2_Filtered.csv")

n2o_raw <- read.csv("C:/Users/KAUFMADA/Documents/R_Files/windrow-fluxes-co2-model-improvements/windrow-fluxes-co2-model-improvements/data_clean/N2O_R_2_Filtered.csv")

Plots flux values for each Trace Gas by Chamber

# Plotting Flux Vales by chamber for each trace gas. 

co2_clean <- co2_raw %>%
  filter(CO2_DRY.initial_value >= 0)

# Plot
chamber_plots_co2 <- co2_clean %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY,
    y            = FCO2_DRY.LIN,
    group        = Chamber_Corrected,
    y_lab        = expression("CO"[2]~"Flux"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_co2  <- chamber_plots_co2[["C1"]] / chamber_plots_co2[["C2"]] / chamber_plots_co2[["C3"]]
right_col_co2 <- chamber_plots_co2[["C4"]] / chamber_plots_co2[["C5"]] / chamber_plots_co2[["C6"]]

fig_co2_individual <- left_col_co2 | right_col_co2
fig_co2_individual

ch4_clean <- ch4_raw %>%
  filter(CH4_DRY.initial_value >= 0,
         CH4_DRY.initial_value <= 100000)
# Plot
chamber_plots_ch4 <- ch4_clean %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY,
    y            = FCH4_DRY.LIN,
    group        = Chamber_Corrected,
    y_lab        = expression("CH"[4]~"Flux"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_ch4  <- chamber_plots_ch4[["C1"]] / chamber_plots_ch4[["C2"]] / chamber_plots_ch4[["C3"]]
right_col_ch4 <- chamber_plots_ch4[["C4"]] / chamber_plots_ch4[["C5"]] / chamber_plots_ch4[["C6"]]

fig_ch4_individual <- left_col_ch4 | right_col_ch4
fig_ch4_individual

N2O_clean <- n2o_raw %>%
  filter(N2O_DRY.initial_value >= 0,
         N2O_DRY.initial_value <= 100000)
# Plot
chamber_plots_N2O <- N2O_clean %>%
  split(.$Chamber_Corrected) %>%
  imap(~ plot_temp(
    df           = .x,
    x            = DOY,
    y            = FN2O_DRY.LIN,
    group        = Chamber_Corrected,
    y_lab        = expression("N"[2]~"O Flux"),
    x_lab        = "Day of year",
    legend_title = "Chamber",
    cols         = chamber_cols
  ) +
    ggtitle(.y) +
    theme(legend.position = "none")
  )

left_col_N2O  <- chamber_plots_N2O[["C1"]] / chamber_plots_N2O[["C2"]] / chamber_plots_N2O[["C3"]]
right_col_N2O <- chamber_plots_N2O[["C4"]] / chamber_plots_N2O[["C5"]] / chamber_plots_N2O[["C6"]]

fig_N2O_individual <- left_col_N2O | right_col_N2O
fig_N2O_individual

Plots where temperature and co2 emissions are plotted on the same graph individually by pile.

chambers <- unique(co2_raw$Chamber_Corrected)

chamber_plots_overlay <- setNames(lapply(chambers, function(ch) {
  
  co2_ch  <- co2_clean  %>% filter(Chamber_Corrected == ch)
  temp_ch <- Cleaned_Data  %>% filter(Chamber_Corrected == ch)

  # Scale factor per chamber
  scale_factor <- max(co2_ch$FCO2_DRY.LIN, na.rm = TRUE) / 
                  max(temp_ch$pile_temp,    na.rm = TRUE)
  
  ggplot() +
    geom_point(data = co2_ch,
               aes(x = DOY, y = FCO2_DRY.LIN),
               colour = chamber_cols[[ch]], size = 0.8, alpha = 0.4, shape = 16) +
    geom_point(data = temp_ch,
               aes(x = DOY.initial_value, y = pile_temp * scale_factor),
               colour = "black", size = 0.8, alpha = 0.4, shape = 17) +
    scale_y_continuous(
      name = expression("CO"[2]~"Flux"),
      sec.axis = sec_axis(~ . / scale_factor,
                          name = expression("Temperature (°C)"))
    ) +
    theme_bw(base_size = 11) +
    theme(
      legend.position    = "none",
      panel.grid.minor   = element_blank(),
      panel.grid.major   = element_line(colour = "grey92", linewidth = 0.3),
      axis.title.y.right = element_text(colour = "black"),
      axis.text.y.right  = element_text(colour = "black")
    ) +
    labs(x = "Day of year") +
    ggtitle(ch)
  
}), chambers)

left_col_overlay  <- chamber_plots_overlay[["C1"]] / chamber_plots_overlay[["C2"]] / chamber_plots_overlay[["C3"]]
right_col_overlay <- chamber_plots_overlay[["C4"]] / chamber_plots_overlay[["C5"]] / chamber_plots_overlay[["C6"]]

fig_overlay <- left_col_overlay | right_col_overlay
fig_overlay

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

Plots where temperature and ch4 emissions are plotted on the same graph individually by pile. Log transformed for better scales

chamber_plots_overlay_CH4 <- setNames(lapply(chambers, function(ch) {
  
  CH4_ch  <- ch4_clean   %>% filter(Chamber_Corrected == ch)
  temp_ch <- Cleaned_Data %>% filter(Chamber_Corrected == ch)
  
  # Scale factor on log scale
  scale_factor <- max(log1p(CH4_ch$FCH4_DRY.LIN), na.rm = TRUE) / 
                  max(temp_ch$pile_temp,            na.rm = TRUE)
  
  ggplot() +
    geom_point(data = CH4_ch,
               aes(x = DOY, y = log1p(FCH4_DRY.LIN)),
               colour = chamber_cols[[ch]], size = 0.8, alpha = 0.4, shape = 16) +
    geom_point(data = temp_ch,
               aes(x = DOY.initial_value, y = pile_temp * scale_factor),
               colour = "black", size = 0.8, alpha = 0.4, shape = 17) +
    scale_y_continuous(
      name = expression("CH4"[4]*" Flux (log scale)"),
      sec.axis = sec_axis(~ . / scale_factor,
                          name = expression("Temperature (°C)"))
    ) +
    theme_bw(base_size = 11) +
    theme(
      legend.position    = "none",
      panel.grid.minor   = element_blank(),
      panel.grid.major   = element_line(colour = "grey92", linewidth = 0.3),
      axis.title.y.right = element_text(colour = "black"),
      axis.text.y.right  = element_text(colour = "black")
    ) +
    labs(x = "Day of year") +
    ggtitle(ch)
  
}), chambers)

left_col_overlay_CH4  <- chamber_plots_overlay_CH4[["C1"]] / chamber_plots_overlay_CH4[["C2"]] / chamber_plots_overlay_CH4[["C3"]]
right_col_overlay_CH4 <- chamber_plots_overlay_CH4[["C4"]] / chamber_plots_overlay_CH4[["C5"]] / chamber_plots_overlay_CH4[["C6"]]

fig_overlay_CH4 <- left_col_overlay_CH4 | right_col_overlay_CH4
fig_overlay_CH4

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

overlaid temperature n2o data log scale

chamber_plots_overlay_n2o <- setNames(lapply(chambers, function(ch) {
  
  n2o_ch  <- N2O_clean   %>% filter(Chamber_Corrected == ch)
  temp_ch <- Cleaned_Data %>% filter(Chamber_Corrected == ch)
  
  # Scale factor on log scale
  scale_factor <- max(log1p(n2o_ch$FN2O_DRY.LIN), na.rm = TRUE) / 
                  max(temp_ch$pile_temp,            na.rm = TRUE)
  
  ggplot() +
    geom_point(data = n2o_ch,
               aes(x = DOY, y = log1p(FN2O_DRY.LIN)),
               colour = chamber_cols[[ch]], size = 0.8, alpha = 0.4, shape = 16) +
    geom_point(data = temp_ch,
               aes(x = DOY.initial_value, y = pile_temp * scale_factor),
               colour = "black", size = 0.8, alpha = 0.4, shape = 17) +
    scale_y_continuous(
      name = expression("N"[2]*"O Flux (log scale)"),
      sec.axis = sec_axis(~ . / scale_factor,
                          name = expression("Temperature (°C)"))
    ) +
    theme_bw(base_size = 11) +
    theme(
      legend.position    = "none",
      panel.grid.minor   = element_blank(),
      panel.grid.major   = element_line(colour = "grey92", linewidth = 0.3),
      axis.title.y.right = element_text(colour = "black"),
      axis.text.y.right  = element_text(colour = "black")
    ) +
    labs(x = "Day of year") +
    ggtitle(ch)
  
}), chambers)

left_col_overlay_n2o  <- chamber_plots_overlay_n2o[["C1"]] / chamber_plots_overlay_n2o[["C2"]] / chamber_plots_overlay_n2o[["C3"]]
right_col_overlay_n2o <- chamber_plots_overlay_n2o[["C4"]] / chamber_plots_overlay_n2o[["C5"]] / chamber_plots_overlay_n2o[["C6"]]

fig_overlay_n2o <- left_col_overlay_n2o | right_col_overlay_n2o
fig_overlay_n2o

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

Creating Tables with Pile Variables:

pile_vars_temp <- Cleaned_Data %>%
  filter(!is.na(pile_temp)) %>%
  group_by(Pile) %>%
  summarise(
    n_obs = n(),
    mean_temp = mean(pile_temp),
    sd_temp   = sd(pile_temp),
    se_temp   = sd_temp/sqrt(n_obs)
  )

pile_vars_water <- Cleaned_Data %>%
  filter(!is.na(pile_moist)) %>%
  group_by(Pile) %>%
  summarise(
    n_obs = n(),
    mean_moist = mean(pile_moist),
    sd_moist   = sd(pile_moist),
    se_moist   = sd_moist/sqrt(n_obs)
  )

Summary statistics for volumetric water content by pile. Summary statistics for Temperature by pile. Box plots by pile dunn_results test by pile: to see which pairs are significantly different for temp

Summary statistics for volumetric water content by pile.
Pile	n_obs	mean_moist	sd_moist	se_moist
C	7195	0.365	0.252	0.003
E	3777	0.550	0.231	0.004

Summary statistics for Temperature by pile.
Pile	n_obs	mean_temp	sd_temp	se_temp
C	9192	54.069	8.992	0.094
E	9420	51.778	12.812	0.132

## Warning: Removed 2478 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pile_temp by Chamber_Corrected
## Kruskal-Wallis chi-squared = 2059.9, df = 5, p-value < 2.2e-16

## Warning: package 'dunn.test' was built under R version 4.5.3

##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 2059.9229, df = 5, p-value = 0
## 
## 
##                    Dunn's Pairwise Comparison of x by group                   
##                                  (Bonferroni)                                 
## Col Mean-│
## Row Mean │         C1         C2         C3         C4         C5
## ─────────┼───────────────────────────────────────────────────────
##       C2 │   14.01824
##          │     0.0000*
##          │
##       C3 │   7.369934  -6.733721
##          │     0.0000*    0.0000*
##          │
##       C4 │   14.72761   0.157561   7.153147
##          │     0.0000*    1.0000     0.0000*
##          │
##       C5 │  -15.06356  -27.64340  -21.73351  -28.69974
##          │     0.0000*    0.0000*    0.0000*    0.0000*
##          │
##       C6 │   29.05269   13.71717   21.16311   14.12608   42.03163
##          │     0.0000*    0.0000*    0.0000*    0.0000*    0.0000*
## 
## α = 0.05
## Reject Ho if p ≤ α/2, where p = Pr(Z ≥ |z|)

## Warning: package 'rstatix' was built under R version 4.5.3

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

## Warning: package 'ggpubr' was built under R version 4.5.3

## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend

## Warning: Removed 2478 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 2478 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Measuring days in thermophillic stage by chamber

Cleaned_Data %>%
  group_by(Chamber_Corrected) %>%
  summarise(
    max_temp         = max(pile_temp, na.rm = TRUE),
    n_obs_above_70   = sum(pile_temp > 70, na.rm = TRUE),
    n_obs_above_75   = sum(pile_temp > 75, na.rm = TRUE),
    first_above_70   = min(DOY.initial_value[pile_temp > 70], na.rm = TRUE),
    last_above_70    = max(DOY.initial_value[pile_temp > 70], na.rm = TRUE)
  )

## # A tibble: 6 × 6
##   Chamber_Corrected max_temp n_obs_above_70 n_obs_above_75 first_above_70
##   <chr>                <dbl>          <int>          <int>          <dbl>
## 1 C1                    71.6              3              0           153.
## 2 C2                    82.3            320            190           160.
## 3 C3                    73.4             49              0           153.
## 4 C4                    77.5            128              3           160.
## 5 C5                    75.4            163             19           154.
## 6 C6                    73.4             43              0           160.
## # ℹ 1 more variable: last_above_70 <dbl>

Cleaned_Data %>%
  group_by(Chamber_Corrected, floor(DOY.initial_value)) %>%
  summarise(daily_mean_temp = mean(pile_temp, na.rm = TRUE), .groups = "drop") %>%
  rename(DOY_day = `floor(DOY.initial_value)`) %>%
  group_by(Chamber_Corrected) %>%
  arrange(DOY_day) %>%
  mutate(
    above_55 = daily_mean_temp > 55,
    # identify consecutive groups
    group_id = cumsum(!above_55)
  ) %>%
  filter(above_55) %>%
  group_by(Chamber_Corrected, group_id) %>%
  summarise(
    consecutive_days = n(),
    start_DOY = min(DOY_day),
    end_DOY   = max(DOY_day),
    .groups = "drop"
  ) %>%
  group_by(Chamber_Corrected) %>%
  summarise(
    max_consecutive_days_above_55 = max(consecutive_days),
    start_DOY = start_DOY[which.max(consecutive_days)],
    end_DOY   = end_DOY[which.max(consecutive_days)]
  )

## # A tibble: 6 × 4
##   Chamber_Corrected max_consecutive_days_above_55 start_DOY end_DOY
##   <chr>                                     <int>     <dbl>   <dbl>
## 1 C1                                           17       195     213
## 2 C2                                           20       152     171
## 3 C3                                           20       152     171
## 4 C4                                           31       173     220
## 5 C5                                           43       159     227
## 6 C6                                           13       219     231

Looking at % measurements removed in cleaning - not sure about this need to review what processing was used to get to ch4/n2o/co2 raw as I think there was some initial cleaning.

library(gt)

## Warning: package 'gt' was built under R version 4.5.3

## 
## Attaching package: 'gt'

## The following object is masked from 'package:cowplot':
## 
##     as_gtable

# Build summary table
obs_summary <- ch4_raw %>%
  group_by(Chamber_Corrected) %>%
  summarise(n_raw = n()) %>%
  left_join(
    ch4_clean %>% group_by(Chamber_Corrected) %>% summarise(n_clean = n())
  ) %>%
  mutate(
    n_removed   = n_raw - n_clean,
    pct_removed = round(n_removed / n_raw * 100, 1)
  ) %>%
  left_join(
    ch4_clean %>%
      group_by(Chamber_Corrected) %>%
      summarise(
        median_ch4 = round(median(FCH4_DRY.LIN, na.rm = TRUE), 1),
        iqr_ch4    = round(IQR(FCH4_DRY.LIN,    na.rm = TRUE), 1),
        max_ch4    = round(max(FCH4_DRY.LIN,     na.rm = TRUE), 1)
      )
  ) %>%
  left_join(
    Cleaned_Data %>%
      group_by(Chamber_Corrected) %>%
      summarise(median_temp = round(median(pile_temp, na.rm = TRUE), 1))
  )

## Joining with `by = join_by(Chamber_Corrected)`

## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`

# Render with gt
obs_summary %>%
  gt() %>%
  cols_label(
    Chamber_Corrected = "Chamber",
    n_raw             = "Raw Obs.",
    n_clean           = "Clean Obs.",
    n_removed         = "Removed",
    pct_removed       = "% Removed",
    median_ch4        = "Median CH₄ Flux",
    iqr_ch4           = "IQR CH₄ Flux",
    max_ch4           = "Max CH₄ Flux",
    median_temp       = "Median Temp (°C)"
  ) %>%
  tab_header(
    title    = "CH₄ Data Quality and Summary Statistics by Chamber",
    subtitle = "Raw vs. cleaned observations, removal rates, and flux summaries"
  ) %>%
  tab_spanner(
    label   = "Observation Count",
    columns = c(n_raw, n_clean, n_removed, pct_removed)
  ) %>%
  tab_spanner(
    label   = "CH₄ Flux (nmol m⁻² s⁻¹)",
    columns = c(median_ch4, iqr_ch4, max_ch4)
  ) %>%
  tab_footnote(
    footnote = "Raw observations: all measurements collected by the multiplexer. Clean observations: remaining after removal of negative initial values and values exceeding 50,000 ppb/ppm. % Removed reflects data lost to quality filtering.",
    locations = cells_title()
  ) %>%
  tab_style(
    style     = cell_fill(color = "#f7f7f7"),
    locations = cells_body(rows = seq(1, nrow(obs_summary), 2))
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  opt_table_font(font = "Times New Roman")

Chamber	Observation Count				CH₄ Flux (nmol m⁻² s⁻¹)			Median Temp (°C)
CH₄ Data Quality and Summary Statistics by Chamber¹
Raw vs. cleaned observations, removal rates, and flux summaries¹
Chamber	Raw Obs.	Clean Obs.	Removed	% Removed	Median CH₄ Flux	IQR CH₄ Flux	Max CH₄ Flux	Median Temp (°C)
C1	1498	1470	28	1.9	1032.2	2447.4	49978.7	56.2
C2	2248	2101	147	6.5	9452.2	12937.6	89459.9	51.0
C3	1609	1308	301	18.7	265.3	270.9	14499.1	54.3
C4	1348	1341	7	0.5	721.2	1982.1	24203.6	51.0
C5	1626	1280	346	21.3	2646.1	4822.8	73778.6	62.7
C6	1963	1505	458	23.3	2055.2	5205.9	41800.1	48.8
¹ Raw observations: all measurements collected by the multiplexer. Clean observations: remaining after removal of negative initial values and values exceeding 50,000 ppb/ppm. % Removed reflects data lost to quality filtering.

library(gt)

# Build summary table
obs_summary <- ch4_raw %>%
  group_by(Chamber_Corrected) %>%
  summarise(n_raw_ch4 = n()) %>%
  left_join(
    ch4_clean %>% group_by(Chamber_Corrected) %>% summarise(n_clean_ch4 = n())
  ) %>%
  mutate(
    n_removed_ch4   = n_raw_ch4 - n_clean_ch4,
    pct_removed_ch4 = round(n_removed_ch4 / n_raw_ch4 * 100, 1)
  ) %>%
  left_join(
    co2_raw %>%
      group_by(Chamber_Corrected) %>%
      summarise(n_raw_co2 = n())
  ) %>%
  left_join(
    co2_clean %>% group_by(Chamber_Corrected) %>% summarise(n_clean_co2 = n())
  ) %>%
  mutate(
    n_removed_co2   = n_raw_co2 - n_clean_co2,
    pct_removed_co2 = round(n_removed_co2 / n_raw_co2 * 100, 1)
  ) %>%
  left_join(
    n2o_raw %>%
      group_by(Chamber_Corrected) %>%
      summarise(n_raw_n2o = n())
  ) %>%
  left_join(
    N2O_clean %>% group_by(Chamber_Corrected) %>% summarise(n_clean_n2o = n())
  ) %>%
  mutate(
    n_removed_n2o   = n_raw_n2o - n_clean_n2o,
    pct_removed_n2o = round(n_removed_n2o / n_raw_n2o * 100, 1)
  ) %>%
  # Flux summaries
  left_join(
    ch4_clean %>%
      group_by(Chamber_Corrected) %>%
      summarise(
        median_ch4 = round(median(FCH4_DRY.LIN, na.rm = TRUE), 1),
        iqr_ch4    = round(IQR(FCH4_DRY.LIN,    na.rm = TRUE), 1)
      )
  ) %>%
  left_join(
    co2_clean %>%
      group_by(Chamber_Corrected) %>%
      summarise(
        median_co2 = round(median(FCO2_DRY.LIN, na.rm = TRUE), 1),
        iqr_co2    = round(IQR(FCO2_DRY.LIN,    na.rm = TRUE), 1)
      )
  ) %>%
  left_join(
    N2O_clean %>%
      group_by(Chamber_Corrected) %>%
      summarise(
        median_n2o = round(median(FN2O_DRY.LIN, na.rm = TRUE), 1),
        iqr_n2o    = round(IQR(FN2O_DRY.LIN,    na.rm = TRUE), 1)
      )
  ) %>%
  left_join(
    Cleaned_Data %>%
      group_by(Chamber_Corrected) %>%
      summarise(median_temp = round(median(pile_temp, na.rm = TRUE), 1))
  )

## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`
## Joining with `by = join_by(Chamber_Corrected)`

# Render with gt
obs_summary %>%
  gt() %>%
  cols_label(
    Chamber_Corrected = "Chamber",
    n_raw_ch4         = "Raw",
    n_clean_ch4       = "Clean",
    pct_removed_ch4   = "% Removed",
    n_raw_co2         = "Raw",
    n_clean_co2       = "Clean",
    pct_removed_co2   = "% Removed",
    n_raw_n2o         = "Raw",
    n_clean_n2o       = "Clean",
    pct_removed_n2o   = "% Removed",
    median_ch4        = "Median",
    iqr_ch4           = "IQR",
    median_co2        = "Median",
    iqr_co2           = "IQR",
    median_n2o        = "Median",
    iqr_n2o           = "IQR",
    median_temp       = "Median Temp (°C)"
  ) %>%
  cols_hide(c(n_removed_ch4, n_removed_co2, n_removed_n2o)) %>%
  tab_header(
    title    = "Data Quality and Flux Summary Statistics by Chamber",
    subtitle = "Raw vs. cleaned observations and median flux values for all three trace gases"
  ) %>%
  tab_spanner(
    label   = "CH₄ Observations",
    columns = c(n_raw_ch4, n_clean_ch4, pct_removed_ch4)
  ) %>%
  tab_spanner(
    label   = "CO₂ Observations",
    columns = c(n_raw_co2, n_clean_co2, pct_removed_co2)
  ) %>%
  tab_spanner(
    label   = "N₂O Observations",
    columns = c(n_raw_n2o, n_clean_n2o, pct_removed_n2o)
  ) %>%
  tab_spanner(
    label   = "CH₄ Flux",
    columns = c(median_ch4, iqr_ch4)
  ) %>%
  tab_spanner(
    label   = "CO₂ Flux",
    columns = c(median_co2, iqr_co2)
  ) %>%
  tab_spanner(
    label   = "N₂O Flux",
    columns = c(median_n2o, iqr_n2o)
  ) %>%
  tab_footnote(
    footnote = "Raw: all multiplexer observations. Clean: remaining after quality filtering (removal of negative initial values; CH₄ capped at 50,000 ppb). % Removed reflects data lost to filtering. Flux units: nmol m⁻² s⁻¹. Median and IQR used given heavily right-skewed flux distributions.",
    locations = cells_title()
  ) %>%
  tab_style(
    style     = cell_fill(color = "#f7f7f7"),
    locations = cells_body(rows = seq(1, 6, 2))
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  opt_table_font(font = "Times New Roman")

Chamber	CH₄ Observations			CO₂ Observations			N₂O Observations			CH₄ Flux		CO₂ Flux		N₂O Flux		Median Temp (°C)
Data Quality and Flux Summary Statistics by Chamber¹
Raw vs. cleaned observations and median flux values for all three trace gases¹
Chamber	Raw	Clean	% Removed	Raw	Clean	% Removed	Raw	Clean	% Removed	Median	IQR	Median	IQR	Median	IQR	Median Temp (°C)
C1	1498	1470	1.9	1769	1711	3.3	2146	2146	0	1032.2	2447.4	111.3	97.7	55.6	81.1	56.2
C2	2248	2101	6.5	1683	1682	0.1	2638	2638	0	9452.2	12937.6	240.6	292.2	121.6	229.1	51.0
C3	1609	1308	18.7	1612	1596	1.0	2122	2122	0	265.3	270.9	93.3	111.3	37.8	80.0	54.3
C4	1348	1341	0.5	1413	1369	3.1	2644	2644	0	721.2	1982.1	116.3	135.2	34.4	101.9	51.0
C5	1626	1280	21.3	1655	1646	0.5	2306	2305	0	2646.1	4822.8	196.1	208.6	35.1	70.4	62.7
C6	1963	1505	23.3	1147	1118	2.5	1961	1961	0	2055.2	5205.9	126.5	127.3	36.5	87.6	48.8
¹ Raw: all multiplexer observations. Clean: remaining after quality filtering (removal of negative initial values; CH₄ capped at 50,000 ppb). % Removed reflects data lost to filtering. Flux units: nmol m⁻² s⁻¹. Median and IQR used given heavily right-skewed flux distributions.

Making Chart with Average values by Pile and SD,SE for each chamber and trace gas

# CO2
co2_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(mean_co2 = mean(FCO2_DRY.LIN, na.rm = TRUE))

## # A tibble: 6 × 2
##   Chamber_Corrected mean_co2
##   <chr>                <dbl>
## 1 C1                    127.
## 2 C2                    323.
## 3 C3                    125.
## 4 C4                    156.
## 5 C5                    243.
## 6 C6                    164.

# CH4
ch4_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(mean_ch4 = mean(FCH4_DRY.LIN, na.rm = TRUE))

## # A tibble: 6 × 2
##   Chamber_Corrected mean_ch4
##   <chr>                <dbl>
## 1 C1                   2435.
## 2 C2                  11190.
## 3 C3                    418.
## 4 C4                   1684.
## 5 C5                   4339.
## 6 C6                   4720.

# N2O
N2O_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(mean_n2o = mean(FN2O_DRY.LIN, na.rm = TRUE))

## # A tibble: 6 × 2
##   Chamber_Corrected mean_n2o
##   <chr>                <dbl>
## 1 C1                   112. 
## 2 C2                   254. 
## 3 C3                    82.0
## 4 C4                    78.4
## 5 C5                    97.9
## 6 C6                   126.

pile_vars_co2 <- co2_raw %>%
  filter(!is.na(FCO2_DRY.LIN)) %>%
  group_by(Pile) %>%
  summarise(
    n_obs = n(),
    mean_co2 = mean(FCO2_DRY.LIN),
    sd_co2 = sd(FCO2_DRY.LIN),
    se_co2   = sd_co2/sqrt(n_obs)
  )

pile_vars_ch4 <- ch4_raw %>%
  filter(!is.na(FCH4_DRY.LIN),
         CH4_DRY.initial_value >= 0,
         CH4_DRY.initial_value <= 50000) %>%
  group_by(Pile) %>%
  summarise(
    n_obs    = n(),
    mean_ch4 = mean(FCH4_DRY.LIN),
    sd_ch4   = sd(FCH4_DRY.LIN),
    se_ch4   = sd_ch4 / sqrt(n_obs)
  )
pile_vars_n2o <- n2o_raw %>%
  filter(!is.na(FN2O_DRY.LIN)) %>%
  group_by(Pile) %>%
  summarise(
    n_obs = n(),
    mean_n2o = mean(FN2O_DRY.LIN),
    sd_n2o   = sd(FN2O_DRY.LIN),
    se_n2o   = sd_n2o/sqrt(n_obs)
  )

# CH4
ch4_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(
    median_ch4 = median(FCH4_DRY.LIN, na.rm = TRUE),
    iqr_ch4    = IQR(FCH4_DRY.LIN, na.rm = TRUE),
    max_ch4    = max(FCH4_DRY.LIN, na.rm = TRUE)
  )

## # A tibble: 6 × 4
##   Chamber_Corrected median_ch4 iqr_ch4 max_ch4
##   <chr>                  <dbl>   <dbl>   <dbl>
## 1 C1                     1032.   2447.  49979.
## 2 C2                     9452.  12938.  89460.
## 3 C3                      265.    271.  14499.
## 4 C4                      721.   1982.  24204.
## 5 C5                     2646.   4823.  73779.
## 6 C6                     2055.   5206.  41800.

# CO2
co2_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(
    median_co2 = median(FCO2_DRY.LIN, na.rm = TRUE),
    iqr_co2    = IQR(FCO2_DRY.LIN, na.rm = TRUE),
    max_co2    = max(FCO2_DRY.LIN, na.rm = TRUE)
  )

## # A tibble: 6 × 4
##   Chamber_Corrected median_co2 iqr_co2 max_co2
##   <chr>                  <dbl>   <dbl>   <dbl>
## 1 C1                     111.     97.7   1100.
## 2 C2                     241.    292.    2312.
## 3 C3                      93.3   111.     548.
## 4 C4                     116.    135.    1911.
## 5 C5                     196.    209.    1800.
## 6 C6                     127.    127.    1272.

# N2O
N2O_clean %>%
  group_by(Chamber_Corrected) %>%
  summarise(
    median_n2o = median(FN2O_DRY.LIN, na.rm = TRUE),
    iqr_n2o    = IQR(FN2O_DRY.LIN, na.rm = TRUE),
    max_n2o    = max(FN2O_DRY.LIN, na.rm = TRUE)
  )

## # A tibble: 6 × 4
##   Chamber_Corrected median_n2o iqr_n2o max_n2o
##   <chr>                  <dbl>   <dbl>   <dbl>
## 1 C1                      55.6    81.1   2578.
## 2 C2                     122.    229.    3564.
## 3 C3                      37.8    80.0   1621.
## 4 C4                      34.4   102.    2513.
## 5 C5                      35.1    70.4   1681.
## 6 C6                      36.5    87.6   5127.

Displaying Average Values for Each Trace Gas by Pile:

kable(
  pile_vars_co2,
  digits = 3,
  caption = "Summary statistics for co2 flux by pile."
) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("bordered", "striped")
  )

Summary statistics for co2 flux by pile.
Pile	n_obs	mean_co2	sd_co2	se_co2
C	5064	190.076	202.288	2.843
E	4215	191.757	193.987	2.988

kable(
  pile_vars_ch4,
  digits = 3,
  caption = "Summary statistics for ch4 flux by pile."
) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("bordered", "striped")
  )

Summary statistics for ch4 flux by pile.
Pile	n_obs	mean_ch4	sd_ch4	se_ch4
C	4252	4115.308	8180.516	125.454
E	3673	2754.121	5028.936	82.979

kable(
  pile_vars_n2o,
  digits = 3,
  caption = "Summary statistics for n2o flux by pile."
) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("bordered", "striped")
  )

Summary statistics for n2o flux by pile.
Pile	n_obs	mean_n2o	sd_n2o	se_n2o
C	6906	157.089	295.036	3.550
E	6911	98.481	245.689	2.955

Creating Daily and Weekly Plots for all Fluxes

# N2O
daily_n2o <- n2o_raw %>%
  mutate(DOY = floor(DOY)) %>%        # collapse fractional DOY
  group_by(Pile, DOY) %>%
  summarise(
    n_obs       = sum(!is.na(FN2O_DRY.LIN)),
    n2o_mean    = mean(FN2O_DRY.LIN, na.rm = TRUE),
    n2o_se      = se(FN2O_DRY.LIN),
    .groups     = "drop"
  ) %>%
  filter(n_obs >= 3)                  # tweak threshold if needed

# CO2
daily_co2 <- co2_raw %>%
  mutate(DOY = floor(DOY)) %>%
  group_by(Pile, DOY) %>%
  summarise(
    n_obs       = sum(!is.na(FCO2_DRY.LIN)),
    co2_mean    = mean(FCO2_DRY.LIN, na.rm = TRUE),
    co2_se      = se(FCO2_DRY.LIN),
    .groups     = "drop"
  ) %>%
  filter(n_obs >= 3)

# CH4
daily_ch4 <- ch4_raw %>%
  mutate(DOY = floor(DOY)) %>%
  group_by(Pile, DOY) %>%
  summarise(
    n_obs       = sum(!is.na(FCH4_DRY.LIN)),
    ch4_mean    = mean(FCH4_DRY.LIN, na.rm = TRUE),
    ch4_se      = se(FCH4_DRY.LIN),
    .groups     = "drop"
  ) %>%
  filter(n_obs >= 3)

weekly_n2o <- n2o_raw %>%
  mutate(
    week = floor((DOY - 1) / 7) + 1
  ) %>%
  group_by(Pile, week) %>%
  summarise(
    n_obs    = sum(!is.na(FN2O_DRY.LIN)),
    n2o_mean = mean(FN2O_DRY.LIN, na.rm = TRUE),
    n2o_se   = sd(FN2O_DRY.LIN, na.rm = TRUE) / sqrt(n_obs),
    DOY      = mean(DOY, na.rm = TRUE),   # x-position for plotting (optional but useful)
    .groups  = "drop"
  ) %>%
  filter(n_obs >= 3)

# CO2
weekly_co2 <- co2_raw %>%
  mutate(
    week = floor((DOY - 1) / 7) + 1
  ) %>%
  group_by(Pile, week) %>%
  summarise(
    n_obs    = sum(!is.na(FCO2_DRY.LIN)),
    co2_mean = mean(FCO2_DRY.LIN, na.rm = TRUE),
    co2_se   = sd(FCO2_DRY.LIN, na.rm = TRUE) / sqrt(n_obs),
    DOY      = mean(DOY, na.rm = TRUE),
    .groups  = "drop"
  ) %>%
  filter(n_obs >= 3)

# CH4
weekly_ch4 <- ch4_raw %>%
  mutate(
    week = floor((DOY - 1) / 7) + 1
  ) %>%
  group_by(Pile, week) %>%
  summarise(
    n_obs    = sum(!is.na(FCH4_DRY.LIN)),
    ch4_mean = mean(FCH4_DRY.LIN, na.rm = TRUE),
    ch4_se   = sd(FCH4_DRY.LIN, na.rm = TRUE) / sqrt(n_obs),
    DOY      = mean(DOY, na.rm = TRUE),
    .groups  = "drop"
  ) %>%
  filter(n_obs >= 3)

Plotting Values Daily and Weekly Plots for all tg Fluxes by pile with turning events

fig_co2_daily <- plot_points_err(
  daily_co2,
  x = DOY, y_mean = co2_mean, y_se = co2_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_co2_daily

fig_co2_weekly <- plot_points_err(
  weekly_co2,
  x = week, y_mean = co2_mean, y_se = co2_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red_week,
  vlines_black = turns_control_black_week,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_co2_weekly

fig_ch4_daily <- plot_points_err(
  daily_ch4,
  x = DOY, y_mean = ch4_mean, y_se = ch4_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_ch4_daily

fig_ch4_weekly <- plot_points_err(
  weekly_ch4,
  x = week, y_mean = ch4_mean, y_se = ch4_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red_week,
  vlines_black = turns_control_black_week,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_ch4_weekly

fig_n2o_daily <- plot_points_err(
  daily_n2o,
  x = DOY, y_mean = n2o_mean, y_se = n2o_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_n2o_daily

fig_n2o_weekly <- plot_points_err(
  weekly_n2o,
  x = week, y_mean = n2o_mean, y_se = n2o_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red_week,
  vlines_black = turns_control_black_week,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_n2o_weekly

Co2 100 year Equivalence Values

Co2 1 Ch4 28 N2o 265

Step 1: using flux data with covars cleaning again but together not individually to only use measurements that happened at the same time.

Flux_All_Gases_QC <- Flux_Data_With_Covars %>%
  filter(
    # CO2
    FCO2_DRY.LIN > 0,
    FCO2_DRY.LIN_R2 > 0.9,

    # CH4
    FCH4_DRY.LIN > 0,
    FCH4_DRY.LIN_R2 > 0.9,

    # N2O
    FN2O_DRY.LIN > 0,
    FN2O_DRY.LIN_R2 > 0.9
  )


Flux_Data_With_Covars %>%
  summarise(
    total_obs = n(),
    pass_all  = sum(
      FCO2_DRY.LIN > 0 & FCO2_DRY.LIN_R2 > 0.9 &
      FCH4_DRY.LIN > 0 & FCH4_DRY.LIN_R2 > 0.9 &
      FN2O_DRY.LIN > 0 & FN2O_DRY.LIN_R2 > 0.9,
      na.rm = TRUE
    ),
    percent_retained = 100 * pass_all / total_obs
  )

## # A tibble: 1 × 3
##   total_obs pass_all percent_retained
##       <int>    <int>            <dbl>
## 1     21090     5558             26.4

Flux_All_Gases_QC <- Flux_All_Gases_QC %>%
  mutate(
    # keep units explicit
    CO2_umol = FCO2_DRY.LIN,                  # µmol m-2 s-1
    N2O_umol = FN2O_DRY.LIN,                  # µmol m-2 s-1
    CH4_umol = FCH4_DRY.LIN * 0.001,          # nmol -> µmol

    # GWP100 CO2-equivalent (µmol CO2-eq m-2 s-1)
    GHG_CO2e_umol = CO2_umol + 28 * CH4_umol + 265 * N2O_umol
  )

Summary Stats of Co2e by pile when looking at measurements where all three gases passed qa/qc

pile_vars_co2e <- Flux_All_Gases_QC %>%
  filter(!is.na(GHG_CO2e_umol)) %>%
  group_by(Pile) %>%
  summarise(
    n_obs = n(),
    mean_CO2e = mean(GHG_CO2e_umol),
    sd_CO2e   = sd(GHG_CO2e_umol),
    se_CO2e = sd_CO2e/sqrt(n_obs)
  )


kable(
  pile_vars_co2e,
  digits = 3,
  caption = "Summary statistics for co2e flux by pile."
) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("bordered", "striped")
  )

Summary statistics for co2e flux by pile.
Pile	n_obs	mean_CO2e	sd_CO2e	se_CO2e
C	3269	59570.66	101672.24	1778.259
E	2289	40046.13	96185.23	2010.414

DOY and Week Plots for measurements that pass cleaning for all TGs GHG potential co2 equivalent

# daily co2e
daily_co2e <- Flux_All_Gases_QC %>%
  mutate(DOY = floor(DOY)) %>%
  group_by(Pile, DOY) %>%
  summarise(
    n_obs       = sum(!is.na(GHG_CO2e_umol)),
    co2e_mean    = mean(GHG_CO2e_umol, na.rm = TRUE),
    co2e_se      = sd(GHG_CO2e_umol, na.rm = TRUE)/sqrt(n_obs),
    .groups     = "drop"
  ) %>%
  filter(n_obs >= 3)


# weekly co2e
weekly_co2e <- Flux_All_Gases_QC %>%
  mutate(
    week = floor((DOY - 1) / 7) + 1
  ) %>%
  group_by(Pile, week) %>%
  summarise(
    n_obs    = sum(!is.na(GHG_CO2e_umol)),
    co2e_mean = mean(GHG_CO2e_umol, na.rm = TRUE),
    co2e_se   = sd(GHG_CO2e_umol, na.rm = TRUE) / sqrt(n_obs),
    DOY      = mean(DOY, na.rm = TRUE),
    .groups  = "drop"
  ) %>%
  filter(n_obs >= 3)
# CO2


fig__co2e_daily <- plot_points_err(
  daily_co2e,
  x = DOY, y_mean = co2e_mean, y_se = co2e_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig__co2e_daily

fig_co2e_weekly <- plot_points_err(
  weekly_co2e,
  x = week, y_mean =co2e_mean, y_se = co2e_se,
  group = Pile,
  cols = pile_cols, shapes = pile_shapes,
  point_size = 1.4,
  vlines_red = turns_both_red_week,
  vlines_black = turns_control_black_week,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed"
)

fig_co2e_weekly

Average ghg per day (individual per gas NOT measurements that all tgs pass cleaning)

daily_all3 <- daily_co2 %>%
  select(Pile, DOY, n_obs_co2 = n_obs, co2_mean, co2_se) %>%
  inner_join(
    daily_ch4 %>% select(Pile, DOY, n_obs_ch4 = n_obs, ch4_mean, ch4_se),
    by = c("Pile", "DOY")
  ) %>%
  inner_join(
    daily_n2o %>% select(Pile, DOY, n_obs_n2o = n_obs, n2o_mean, n2o_se),
    by = c("Pile", "DOY")
  )

daily_all3 <- daily_all3 %>%
  mutate(
    ch4_mean_umol = ch4_mean * 0.001,
    ch4_se_umol   = ch4_se   * 0.001,

    # CO2-equivalent in µmol CO2-eq m-2 s-1 (using your factors)
    co2e_mean = co2_mean + 28 * ch4_mean_umol + 273 * n2o_mean
  )

# not going to error bars for this as it is messy but for now we assume they are independent so each one is one measurement and not correlated which is not true: 

daily_all3 <- daily_all3 %>%
  mutate(
    co2e_se = sqrt(
      co2_se^2 +
      (28 * ch4_se_umol)^2 +
      (273 * n2o_se)^2
    )
  )

fig_co2e_daily_all3 <- plot_points_err(
  daily_all3,
  x = DOY,
  y_mean = co2e_mean,
  y_se = co2e_se,
  group = Pile,
  cols = pile_cols,
  shapes = pile_shapes,
  point_size = 1.6,
  vlines_red = turns_both_red,
  vlines_black = turns_control_black,
  vline_red_linetype = "solid",
  vline_black_linetype = "dashed",
  x_lab = "Day of year",
  y_lab = expression("CO"[2]*"-eq flux ("*mu*"mol m"^-2*" s"^-1*")"),
  legend_title = "Pile"
)

fig_co2e_daily_all3

This is good, I don’t think I need to do a weekly version. Depending on what we want to do we can revisit.

Now we have looked at the differences in each trace gas and all of them together as well as the baseline differences between the piles (temp, moisture). We did not plot other variables like bulk density but the limited number of samples makes this a little difficult. - Not sure how we should include this, in other parts I have this as just a mean , sd , se.

Variable Unit Value SD SE n Bulk Density C g/cm^3 0.17 3.58 1.03 12 Bulk Density E g/cm^3 0.20 5.17 1.49 12

Basic Model Building:

Numerous models have been employed throughout the process of writing this paper: Here I will present all the models, explain them, and then we can choose the model which best explains the data.

Model 1: Linear Mixed Effects Model:

Here we are looking at the magnitude of fluxes between piles taking into account random effects from chambers and an auto correlation structure.

Creating LME for each of the trace gases and co2 equivalent: with auto correlation and variance structure.

# Log transforming co2 data: 

# log transforming flux data for each flux, grouping chambers together  and arranging by doy
dat_co2 <- co2_clean %>% 
  mutate(log_flux = log(FCO2_DRY.LIN))%>%  
  arrange(Chamber_Corrected, DOY) %>%
  group_by(Chamber_Corrected) %>%
  mutate(t_index = row_number()) %>%
  ungroup()

# applying model with random effects and autocorrelation

m_co2 <- lme(
  log_flux ~ Pile,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2,
  method = "REML"
)


# log transforming flux data for each flux, grouping chambers together  and arranging by doy
dat_ch4 <- ch4_clean %>% 
  mutate(log_flux = log(FCH4_DRY.LIN))%>%  
  arrange(Chamber_Corrected, DOY) %>%
  group_by(Chamber_Corrected) %>%
  mutate(t_index = row_number()) %>%
  ungroup()

# applying model 

m_ch4 <- lme(
  log_flux ~ Pile,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4,
  method = "REML"
)

# log transforming flux data for each flux, grouping chambers together  and arranging by doy
dat_n2o <- N2O_clean %>% 
  mutate(log_flux = log(FN2O_DRY.LIN))%>%  
  arrange(Chamber_Corrected, DOY) %>%
  group_by(Chamber_Corrected) %>%
  mutate(t_index = row_number()) %>%
  ungroup()

# applying model 

m_n2o <- lme(
  log_flux ~ Pile,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o,
  method = "REML"
)


dat_co2e <- Flux_All_Gases_QC %>% 
  mutate(log_flux = log(GHG_CO2e_umol))%>%  
  arrange(Chamber_Corrected, DOY) %>%
  group_by(Chamber_Corrected) %>%
  mutate(t_index = row_number()) %>%
  ungroup()

# applying model 

m_co2e <- lme(
  log_flux ~ Pile,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e,
  method = "REML"
)


summary(m_co2)$tTable

##                   Value Std.Error   DF     t-value      p-value
## (Intercept)  4.86578656 0.2516751 9116 19.33360059 1.191418e-81
## PileE       -0.01405495 0.3589091    4 -0.03916019 9.706392e-01

summary(m_ch4)$tTable

##                  Value Std.Error   DF    t-value      p-value
## (Intercept)  6.9920703 0.7160137 8999  9.7652744 2.049989e-22
## PileE       -0.5749777 1.0402128    4 -0.5527501 6.098692e-01

summary(m_n2o)$tTable

##                  Value Std.Error    DF  t-value      p-value
## (Intercept)  4.0477276 0.2135637 13810 18.95326 4.177942e-79
## PileE       -0.4672754 0.3152133     4 -1.48241 2.123687e-01

summary(m_co2e)$tTable

##                  Value Std.Error   DF    t-value       p-value
## (Intercept)  9.9295460 0.3194308 5552 31.0851212 1.003075e-195
## PileE       -0.3951411 0.4905084    4 -0.8055747  4.656491e-01

extract_pile_effect <- function(model, gas_name) {
  tt <- summary(model)$tTable
  
  beta <- tt["PileE", "Value"]
  se   <- tt["PileE", "Std.Error"]
  pval <- tt["PileE", "p-value"]
  
  data.frame(
    Gas = gas_name,
    Ratio_E_over_C = exp(beta),
    CI_low  = exp(beta - 1.96 * se),
    CI_high = exp(beta + 1.96 * se),
    p_value = pval
  )
}

results_lme <- bind_rows(
  extract_pile_effect(m_co2,  "CO2"),
  extract_pile_effect(m_ch4,  "CH4"),
  extract_pile_effect(m_n2o,  "N2O"),
  extract_pile_effect(m_co2e, "CO2e")
)

results_lme

##    Gas Ratio_E_over_C     CI_low  CI_high   p_value
## 1  CO2      0.9860434 0.48796251 1.992533 0.9706392
## 2  CH4      0.5627174 0.07325602 4.322523 0.6098692
## 3  N2O      0.6267075 0.33787022 1.162465 0.2123687
## 4 CO2e      0.6735850 0.25755097 1.761658 0.4656491

knitr::kable(
  results_lme,
  digits = 3,
  caption = "Pile effect estimates from log-scale linear mixed-effects models."
)

Pile effect estimates from log-scale linear mixed-effects models.
Gas	Ratio_E_over_C	CI_low	CI_high	p_value
CO2	0.986	0.488	1.993	0.971
CH4	0.563	0.073	4.323	0.610
N2O	0.627	0.338	1.162	0.212
CO2e	0.674	0.258	1.762	0.466

adding chamber elevation to the model as this impacts flux magnitude and does not have to do with mechanism or treatment:

dat_co2 <- dat_co2 %>%
  group_by(Chamber_Corrected) %>%
  mutate(
    Chamber_Elevation_filled = ifelse(
      is.na(Chamber_Elevation),
      median(Chamber_Elevation, na.rm = TRUE),  # or mean(...)
      Chamber_Elevation
    )
  ) %>%
  ungroup()

dat_co2 <- dat_co2 %>%
  mutate(Chamber_Elevation_sc = as.numeric(scale(Chamber_Elevation_filled)))

m_co2_elev <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2,
  method = "REML",
  na.action = na.omit
)

dat_ch4 <- dat_ch4 %>%
  group_by(Chamber_Corrected) %>%
  mutate(
    Chamber_Elevation_filled = ifelse(
      is.na(Chamber_Elevation),
      median(Chamber_Elevation, na.rm = TRUE),  # or mean(...)
      Chamber_Elevation
    )
  ) %>%
  ungroup()

dat_ch4 <- dat_ch4 %>%
  mutate(Chamber_Elevation_sc = as.numeric(scale(Chamber_Elevation_filled)))

m_ch4_elev <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4,
  method = "REML",
  na.action = na.omit
)

dat_n2o <- dat_n2o %>%
  group_by(Chamber_Corrected) %>%
  mutate(
    Chamber_Elevation_filled = ifelse(
      is.na(Chamber_Elevation),
      median(Chamber_Elevation, na.rm = TRUE),  # or mean(...)
      Chamber_Elevation
    )
  ) %>%
  ungroup()

dat_n2o <- dat_n2o %>%
  mutate(Chamber_Elevation_sc = as.numeric(scale(Chamber_Elevation_filled)))

m_n2o_elev <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o,
  method = "REML",
  na.action = na.omit
)

dat_co2e <- dat_co2e %>%
  group_by(Chamber_Corrected) %>%
  mutate(
    Chamber_Elevation_filled = ifelse(
      is.na(Chamber_Elevation),
      median(Chamber_Elevation, na.rm = TRUE),  # or mean(...)
      Chamber_Elevation
    )
  ) %>%
  ungroup()

dat_co2e <- dat_co2e %>%
  mutate(Chamber_Elevation_sc = as.numeric(scale(Chamber_Elevation_filled)))

m_co2e_elev <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e,
  method = "REML",
  na.action = na.omit
)


results_lme_elv <- bind_rows(
  extract_pile_effect(m_co2e_elev,  "CO2"),
  extract_pile_effect(m_ch4_elev,  "CH4"),
  extract_pile_effect(m_n2o_elev,  "N2O"),
  extract_pile_effect(m_co2e_elev, "CO2e")
)

results_lme_elv

##    Gas Ratio_E_over_C     CI_low  CI_high   p_value
## 1  CO2      0.6089703 0.20520084 1.807229 0.4220016
## 2  CH4      0.5512935 0.05486585 5.539411 0.6395743
## 3  N2O      0.6640129 0.32085816 1.374168 0.3317564
## 4 CO2e      0.6089703 0.20520084 1.807229 0.4220016

Adding temperature to the LME_Elevation_Included - Looking at interaction effect of temperature - emissions are more effected by temperature in the control vs experimental pile which to me makes sense but there are alot of factors - such as different ranges in each pile for temperature. Overall this goes with what we have seen.

Using ML to look at interactions:

Temperature values ≤ 0°C were removed and remaining missing values replaced with the column mean, then z-score standardised. A linear mixed effects model was fitted with log CO₂ flux as the response, including pile treatment, scaled temperature, their interaction, and chamber elevation as fixed effects.

Chamber identity was included as a random intercept, with AR(1) autocorrelation within chambers and pile-level variance heterogeneity accounted for.

The interaction term specifically tests whether temperature drives flux differently between the two piles.

Model fit was formally compared against the elevation-only baseline using a likelihood ratio test (models refitted with ML for comparison).

Predicted flux trajectories were generated across the full temperature range at the population level, holding elevation at its mean, and back-transformed from log scale for visualization.The final plot overlays raw observations with model predictions for each pile, allowing direct visual comparison of the temperature-flux relationship between treatment groups.

dat_co2_int <- dat_co2 %>%
  mutate(Pile = factor(Pile, levels = c("C", "E"))) %>%
  # remove invalid temperature values
  mutate(
    TS_1.initial_value = ifelse(
      TS_1.initial_value <= 0,
      NA,
      TS_1.initial_value
    )
  ) %>%
  
  # fill missing values with the mean
  mutate(
    TS_1_filled = ifelse(
      is.na(TS_1.initial_value),
      mean(TS_1.initial_value, na.rm = TRUE),
      TS_1.initial_value
    )
  ) %>%
  
  # scale AFTER filling
  mutate(
    pile_temp_sc = as.numeric(scale(TS_1_filled))
  )

m_co2_temp_int <- lme(
  log_flux ~ Pile * pile_temp_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2_int,
  method = "REML",
  na.action = na.omit
)

summary(m_co2_temp_int)$tTable

##                             Value  Std.Error   DF     t-value      p-value
## (Intercept)           4.874121106 0.28817080 9113 16.91400094 3.249538e-63
## PileE                -0.004580671 0.40909527    4 -0.01119708 9.916024e-01
## pile_temp_sc          0.402285365 0.02836424 9113 14.18283578 3.532553e-45
## Chamber_Elevation_sc  0.098878046 0.02206649 9113  4.48091457 7.523048e-06
## PileE:pile_temp_sc   -0.095941261 0.04377622 9113 -2.19162953 2.843149e-02

m_base_ml <- update(m_co2_elev, method = "ML")
m_int_ml <- update(m_co2_temp_int, method = "ML")
anova(m_base_ml  , m_int_ml)

##           Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_base_ml     1  7 4863.047 4912.876 -2424.524                        
## m_int_ml      2  9 4623.972 4688.038 -2302.986 1 vs 2 243.0756  <.0001

# temperature range (scaled)
temp_seq <- seq(
  min(dat_co2_int$pile_temp_sc),
  max(dat_co2_int$pile_temp_sc),
  length.out = 100
)

newdat <- expand.grid(
  pile_temp_sc = temp_seq,
  Pile = levels(dat_co2_int$Pile)
)

# hold elevation at its mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict population-level (fixed effects only)
newdat$pred_log <- predict(m_co2_temp_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_co2_int,
       aes(x = pile_temp_sc,
           y = exp(log_flux),
           colour = Pile)) +

  geom_point(alpha = 0.15, size = 1) +

  geom_line(data = newdat,
            aes(x = pile_temp_sc,
                y = pred_flux,
                colour = Pile),
            linewidth = 1.2) +

  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +

  labs(
    x = "Pile temperature (scaled)",
    y = expression(CO[2]~flux~(back~transformed)),
    colour = "Pile"
  )

Looking at ch4 response to temperature by pile: using the same methods as the above chunk

dat_ch4_int <- dat_ch4 %>%
  mutate(Pile = factor(Pile, levels = c("C", "E"))) %>%
  # remove invalid temperature values
  mutate(
    TS_1.initial_value = ifelse(
      TS_1.initial_value <= 0,
      NA,
      TS_1.initial_value
    )
  ) %>%
  
  # fill missing values with the mean
  mutate(
    TS_1_filled = ifelse(
      is.na(TS_1.initial_value),
      mean(TS_1.initial_value, na.rm = TRUE),
      TS_1.initial_value
    )
  ) %>%
  
  # scale AFTER filling
  mutate(
    pile_temp_sc = as.numeric(scale(TS_1_filled))
  )

m_ch4_temp_int <- lme(
  log_flux ~ Pile * pile_temp_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4_int,
  method = "REML",
  na.action = na.omit
)

summary(m_ch4_temp_int)$tTable

##                            Value  Std.Error   DF    t-value      p-value
## (Intercept)           6.98706420 0.79567949 8996  8.7812546 1.913390e-18
## PileE                -0.56873603 1.14602281    4 -0.4962694 6.457371e-01
## pile_temp_sc         -0.01422908 0.03785767 8996 -0.3758573 7.070319e-01
## Chamber_Elevation_sc  0.25677267 0.04009886 8996  6.4034905 1.594647e-10
## PileE:pile_temp_sc    0.37877092 0.07342254 8996  5.1587829 2.538605e-07

m_base_ml <- update(m_ch4_elev, method = "ML")
m_int_ml <- update(m_ch4_temp_int, method = "ML")
anova(m_base_ml  , m_int_ml)

##           Model df       AIC      BIC    logLik   Test  L.Ratio p-value
## m_base_ml     1  7 10002.237 10051.98 -4994.119                        
## m_int_ml      2  9  9974.029 10037.98 -4978.015 1 vs 2 32.20793  <.0001

# temperature range (scaled)
temp_seq <- seq(
  min(dat_ch4_int$pile_temp_sc),
  max(dat_ch4_int$pile_temp_sc),
  length.out = 100
)

newdat <- expand.grid(
  pile_temp_sc = temp_seq,
  Pile = levels(dat_ch4_int$Pile)
)

# hold elevation at its mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict population-level (fixed effects only)
newdat$pred_log <- predict(m_ch4_temp_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_ch4_int,
       aes(x = pile_temp_sc,
           y = exp(log_flux),
           colour = Pile)) +

  geom_point(alpha = 0.15, size = 1) +

  geom_line(data = newdat,
            aes(x = pile_temp_sc,
                y = pred_flux,
                colour = Pile),
            linewidth = 1.2) +

  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +

  labs(
    x = "Pile temperature (scaled)",
    y = expression(Ch[4]~flux~(back~transformed)),
    colour = "Pile"
  )

For both ch4 and co2 emissions the control pile flux respond more to increases in temperatures: It must be noted that is the not the same scale in terms of temps (as the control pile had a higher peak in temp - but it is similar)

Looking at N2o response to temperature by pile using the same methods as the above two piles:

dat_n2o_int <- dat_n2o %>%
  mutate(Pile = factor(Pile, levels = c("C", "E"))) %>%
  # remove invalid temperature values
  mutate(
    TS_1.initial_value = ifelse(
      TS_1.initial_value <= 0,
      NA,
      TS_1.initial_value
    )
  ) %>%
  
  # fill missing values with the mean
  mutate(
    TS_1_filled = ifelse(
      is.na(TS_1.initial_value),
      mean(TS_1.initial_value, na.rm = TRUE),
      TS_1.initial_value
    )
  ) %>%
  
  # scale AFTER filling
  mutate(
    pile_temp_sc = as.numeric(scale(TS_1_filled))
  )

m_n2o_temp_int <- lme(
  log_flux ~ Pile * pile_temp_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o_int,
  method = "REML",
  na.action = na.omit
)

summary(m_n2o_temp_int)$tTable

##                            Value  Std.Error    DF    t-value      p-value
## (Intercept)           4.03288917 0.25383263 13807 15.8879854 2.421479e-56
## PileE                -0.41736205 0.37020373     4 -1.1273848 3.226284e-01
## pile_temp_sc         -0.01430343 0.03251108 13807 -0.4399557 6.599760e-01
## Chamber_Elevation_sc  0.25641189 0.03787699 13807  6.7695953 1.343577e-11
## PileE:pile_temp_sc   -0.02934608 0.05247690 13807 -0.5592191 5.760213e-01

m_base_ml <- update(m_n2o_elev, method = "ML")
m_int_ml <- update(m_n2o_temp_int, method = "ML")
anova(m_base_ml  , m_int_ml)

##           Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_base_ml     1  7 10628.95 10681.69 -5307.477                        
## m_int_ml      2  9 10631.65 10699.45 -5306.823 1 vs 2 1.309517  0.5196

# temperature range (scaled)
temp_seq <- seq(
  min(dat_n2o_int$pile_temp_sc),
  max(dat_n2o_int$pile_temp_sc),
  length.out = 100
)

newdat <- expand.grid(
  pile_temp_sc = temp_seq,
  Pile = levels(dat_n2o_int$Pile)
)

# hold elevation at its mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict population-level (fixed effects only)
newdat$pred_log <- predict(m_n2o_temp_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_n2o_int,
       aes(x = pile_temp_sc,
           y = exp(log_flux),
           colour = Pile)) +

  geom_point(alpha = 0.15, size = 1) +

  geom_line(data = newdat,
            aes(x = pile_temp_sc,
                y = pred_flux,
                colour = Pile),
            linewidth = 1.2) +

  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +

  labs(
    x = "Pile temperature (scaled)",
    y = expression(n2o~flux~(back~transformed)),
    colour = "Pile"
  )

Less of an effect of temperature for N2o - not significant whereas ch4 and co2 whereas

Co2e response to temperature following the same procedure as for each tg

dat_co2e_int <- dat_co2e %>%
  mutate(Pile = factor(Pile, levels = c("C", "E"))) %>%
  # remove invalid temperature values
  mutate(
    TS_1.initial_value = ifelse(
      TS_1.initial_value <= 0,
      NA,
      TS_1.initial_value
    )
  ) %>%
  
  # fill missing values with the mean
  mutate(
    TS_1_filled = ifelse(
      is.na(TS_1.initial_value),
      mean(TS_1.initial_value, na.rm = TRUE),
      TS_1.initial_value
    )
  ) %>%
  
  # scale AFTER filling
  mutate(
    pile_temp_sc = as.numeric(scale(TS_1_filled))
  )

m_co2e_temp_int <- lme(
  log_flux ~ Pile * pile_temp_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e_int,
  method = "REML",
  na.action = na.omit
)

summary(m_co2e_temp_int)$tTable

##                           Value  Std.Error   DF    t-value       p-value
## (Intercept)           9.9584957 0.41351778 5549 24.0823880 5.753037e-122
## PileE                -0.5311861 0.61399085    4 -0.8651369  4.357648e-01
## pile_temp_sc          0.1202260 0.04005672 5549  3.0013927  2.699437e-03
## Chamber_Elevation_sc  0.2199742 0.03335744 5549  6.5944558  4.663712e-11
## PileE:pile_temp_sc   -0.3739876 0.06384617 5549 -5.8576365  4.964783e-09

m_base_ml <- update(m_co2e_elev, method = "ML")
m_int_ml <- update(m_co2e_temp_int, method = "ML")
anova(m_base_ml  , m_int_ml)

##           Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_base_ml     1  7 4827.596 4873.957 -2406.798                        
## m_int_ml      2  9 4797.290 4856.897 -2389.645 1 vs 2 34.30643  <.0001

# temperature range (scaled)
temp_seq <- seq(
  min(dat_co2e_int$pile_temp_sc),
  max(dat_co2e_int$pile_temp_sc),
  length.out = 100
)

newdat <- expand.grid(
  pile_temp_sc = temp_seq,
  Pile = levels(dat_co2e_int$Pile)
)

# hold elevation at its mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict population-level (fixed effects only)
newdat$pred_log <- predict(m_co2e_temp_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_co2e_int,
       aes(x = pile_temp_sc,
           y = exp(log_flux),
           colour = Pile)) +

  geom_point(alpha = 0.15, size = 1) +

  geom_line(data = newdat,
            aes(x = pile_temp_sc,
                y = pred_flux,
                colour = Pile),
            linewidth = 1.2) +

  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +

  labs(
    x = "Pile temperature (scaled)",
    y = expression(Co2e~flux~(back~transformed)),
    colour = "Pile"
  )

Similiar result to co2 and ch4 as they dominate: temperature is a factor as in those models

Adding Moisture data: using same procedure:

# ---- 1) Build modeling dataframe (CO2 + moisture) ----
dat_co2_moist_int <- dat_co2 %>%
  mutate(
    Pile = factor(Pile, levels = c("C", "E")),
    
    # moisture raw (your chosen column)
    moist_raw = SWC_1.initial_value,
    
    # OPTIONAL: screen impossible moisture values (adjust if needed)
    moist_raw = ifelse(moist_raw < 0.01 | moist_raw > 0.90, NA, moist_raw),
    
    # mean-fill, then scale
    moist_filled = ifelse(
      is.na(moist_raw),
      mean(moist_raw, na.rm = TRUE),
      moist_raw
    ),
    pile_moist_sc = as.numeric(scale(moist_filled))
  )

# sanity checks
stopifnot(nrow(dat_co2_moist_int) > 0)
stopifnot(all(!is.na(dat_co2_moist_int$pile_moist_sc)))
stopifnot(!is.null(levels(dat_co2_moist_int$Pile)))

# ---- 2) Fit moisture interaction model (REML) ----
m_co2_moist_int <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2_moist_int,
  method = "REML",
  na.action = na.omit
)

summary(m_co2_moist_int)$tTable

##                              Value  Std.Error   DF     t-value      p-value
## (Intercept)           4.8817454565 0.27215793 9113 17.93717862 9.861628e-71
## PileE                -0.0480349778 0.38774639    4 -0.12388246 9.073840e-01
## pile_moist_sc        -0.0097671707 0.01026659 9113 -0.95135523 3.414493e-01
## Chamber_Elevation_sc  0.0752108093 0.02330188 9113  3.22767092 1.252440e-03
## PileE:pile_moist_sc  -0.0004864577 0.02385422 9113 -0.02039294 9.837304e-01

# ---- 3) ML refits for valid model comparison ----
m_co2_elev_ml <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2_moist_int,   # IMPORTANT: same data object
  method = "ML",
  na.action = na.omit
)

m_co2_moist_int_ml <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2_moist_int,   # same data object
  method = "ML",
  na.action = na.omit
)

anova(m_co2_elev_ml, m_co2_moist_int_ml)

##                    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_co2_elev_ml          1  7 4863.047 4912.876 -2424.524                        
## m_co2_moist_int_ml     2  9 4865.954 4930.020 -2423.977 1 vs 2 1.093577  0.5788

# ---- 4) Prediction grid + plot ----
moist_seq <- seq(
  min(dat_co2_moist_int$pile_moist_sc, na.rm = TRUE),
  max(dat_co2_moist_int$pile_moist_sc, na.rm = TRUE),
  length.out = 100
)

newdat <- expand.grid(
  pile_moist_sc = moist_seq,
  Pile = levels(dat_co2_moist_int$Pile)
)

# hold elevation at mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict fixed effects only
newdat$pred_log  <- predict(m_co2_moist_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_co2_moist_int,
       aes(x = pile_moist_sc, y = exp(log_flux), colour = Pile)) +
  geom_point(alpha = 0.15, size = 1) +
  geom_line(data = newdat, aes(y = pred_flux), linewidth = 1.2) +
  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +
  labs(
    x = "Pile moisture (scaled)",
    y = expression(CO[2]~flux~(back~transformed)),
    colour = "Pile"
  )

Adding moisture does not improve the models co2 flux results

Looking and adding moisture data to ch4 results

dat_ch4_moist_int <- dat_ch4 %>%
  mutate(
    Pile = factor(Pile, levels = c("C", "E")),
    
    # moisture raw (your chosen column)
    moist_raw = SWC_1.initial_value,
    
    # OPTIONAL: screen impossible moisture values (adjust if needed)
    moist_raw = ifelse(moist_raw < 0.01 | moist_raw > 0.90, NA, moist_raw),
    
    # mean-fill, then scale
    moist_filled = ifelse(
      is.na(moist_raw),
      mean(moist_raw, na.rm = TRUE),
      moist_raw
    ),
    pile_moist_sc = as.numeric(scale(moist_filled))
  )

# sanity checks
stopifnot(nrow(dat_ch4_moist_int) > 0)
stopifnot(all(!is.na(dat_ch4_moist_int$pile_moist_sc)))
stopifnot(!is.null(levels(dat_ch4_moist_int$Pile)))

# ---- 2) Fit moisture interaction model (REML) ----
m_ch4_moist_int <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4_moist_int,
  method = "REML",
  na.action = na.omit
)

summary(m_ch4_moist_int)$tTable

##                             Value  Std.Error   DF    t-value      p-value
## (Intercept)           6.989573728 0.80993317 8996  8.6298153 7.192522e-18
## PileE                -0.611832264 1.17071403    4 -0.5226146 6.288521e-01
## pile_moist_sc         0.028371806 0.01294715 8996  2.1913551 2.845166e-02
## Chamber_Elevation_sc  0.251379736 0.04004919 8996  6.2767742 3.616420e-10
## PileE:pile_moist_sc   0.008994654 0.02959614 8996  0.3039130 7.612012e-01

# ---- 3) ML refits for valid model comparison ----
m_ch4_elev_ml <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4_moist_int,   # IMPORTANT: same data object
  method = "ML",
  na.action = na.omit
)

m_ch4_moist_int_ml <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_ch4_moist_int,   # same data object
  method = "ML",
  na.action = na.omit
)

anova(m_ch4_elev_ml, m_ch4_moist_int_ml)

##                    Model df       AIC      BIC    logLik   Test  L.Ratio
## m_ch4_elev_ml          1  7 10002.237 10051.98 -4994.119                
## m_ch4_moist_int_ml     2  9  9999.443 10063.39 -4990.721 1 vs 2 6.794511
##                    p-value
## m_ch4_elev_ml             
## m_ch4_moist_int_ml  0.0335

# ---- 4) Prediction grid + plot ----
moist_seq <- seq(
  min(dat_ch4_moist_int$pile_moist_sc, na.rm = TRUE),
  max(dat_ch4_moist_int$pile_moist_sc, na.rm = TRUE),
  length.out = 100
)

newdat <- expand.grid(
  pile_moist_sc = moist_seq,
  Pile = levels(dat_ch4_moist_int$Pile)
)

# hold elevation at mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict fixed effects only
newdat$pred_log  <- predict(m_ch4_moist_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_ch4_moist_int,
       aes(x = pile_moist_sc, y = exp(log_flux), colour = Pile)) +
  geom_point(alpha = 0.15, size = 1) +
  geom_line(data = newdat, aes(y = pred_flux), linewidth = 1.2) +
  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +
  labs(
    x = "Pile moisture (scaled)",
    y = expression(CH[4]~flux~(back~transformed)),
    colour = "Pile"
  )

Moisture improves ch4 model - not co2 which is not unexpected but you would think it would be related. There is not an interaction effect between pile and moisture that is significant

Looking and adding moisture data to n2o results - moisture impacts n2o flux but not differently by pile

dat_n2o_moist_int <- dat_n2o %>%
  mutate(
    Pile = factor(Pile, levels = c("C", "E")),
    
    # moisture raw (your chosen column)
    moist_raw = SWC_1.initial_value,
    
    # OPTIONAL: screen impossible moisture values (adjust if needed)
    moist_raw = ifelse(moist_raw < 0.01 | moist_raw > 0.9, NA, moist_raw),
    
    # mean-fill, then scale
    moist_filled = ifelse(
      is.na(moist_raw),
      mean(moist_raw, na.rm = TRUE),
      moist_raw
    ),
    pile_moist_sc = as.numeric(scale(moist_filled))
  )

# sanity checks
stopifnot(nrow(dat_n2o_moist_int) > 0)
stopifnot(all(!is.na(dat_n2o_moist_int$pile_moist_sc)))
stopifnot(!is.null(levels(dat_n2o_moist_int$Pile)))

# ---- 2) Fit moisture interaction model (REML) ----
m_n2o_moist_int <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o_moist_int,
  method = "REML",
  na.action = na.omit
)

summary(m_n2o_moist_int)$tTable

##                            Value  Std.Error    DF   t-value      p-value
## (Intercept)           4.03925726 0.25124353 13807 16.077060 1.231760e-57
## PileE                -0.41750662 0.36635316     4 -1.139629 3.180516e-01
## pile_moist_sc         0.02914240 0.01063408 13807  2.740474 6.142959e-03
## Chamber_Elevation_sc  0.25826697 0.03772754 13807  6.845581 7.938154e-12
## PileE:pile_moist_sc  -0.02916308 0.01653487 13807 -1.763732 7.779924e-02

# ---- 3) ML refits for valid model comparison ----
m_n2o_elev_ml <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o_moist_int,   # IMPORTANT: same data object
  method = "ML",
  na.action = na.omit
)

m_n2o_moist_int_ml <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_n2o_moist_int,   # same data object
  method = "ML",
  na.action = na.omit
)

anova(m_n2o_elev_ml, m_n2o_moist_int_ml)

##                    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_n2o_elev_ml          1  7 10628.95 10681.69 -5307.477                        
## m_n2o_moist_int_ml     2  9 10625.40 10693.20 -5303.700 1 vs 2 7.554127  0.0229

# ---- 4) Prediction grid + plot ----
moist_seq <- seq(
  min(dat_n2o_moist_int$pile_moist_sc, na.rm = TRUE),
  max(dat_n2o_moist_int$pile_moist_sc, na.rm = TRUE),
  length.out = 100
)

newdat <- expand.grid(
  pile_moist_sc = moist_seq,
  Pile = levels(dat_n2o_moist_int$Pile)
)

# hold elevation at mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict fixed effects only
newdat$pred_log  <- predict(m_n2o_moist_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_n2o_moist_int,
       aes(x = pile_moist_sc, y = exp(log_flux), colour = Pile)) +
  geom_point(alpha = 0.15, size = 1) +
  geom_line(data = newdat, aes(y = pred_flux), linewidth = 1.2) +
  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +
  labs(
    x = "Pile moisture (scaled)",
    y = expression(n2o~flux~(back~transformed)),
    colour = "Pile"
  )

last of the adding moisture: so far moisture does improve model but there is not a pile specific moisture impact vs temperature where we see pile differences to the response.

dat_co2e_moist_int <- dat_co2e %>%
  mutate(
    Pile = factor(Pile, levels = c("C", "E")),
    
    # moisture raw (your chosen column)
    moist_raw = SWC_1.initial_value,
    
    # OPTIONAL: screen impossible moisture values (adjust if needed)
    moist_raw = ifelse(moist_raw < 0.01 | moist_raw > 0.9, NA, moist_raw),
    
    # mean-fill, then scale
    moist_filled = ifelse(
      is.na(moist_raw),
      mean(moist_raw, na.rm = TRUE),
      moist_raw
    ),
    pile_moist_sc = as.numeric(scale(moist_filled))
  )

# sanity checks
stopifnot(nrow(dat_co2e_moist_int) > 0)
stopifnot(all(!is.na(dat_co2e_moist_int$pile_moist_sc)))
stopifnot(!is.null(levels(dat_co2e_moist_int$Pile)))

# ---- 2) Fit moisture interaction model (REML) ----
m_co2e_moist_int <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e_moist_int,
  method = "REML",
  na.action = na.omit
)

summary(m_co2e_moist_int)$tTable

##                            Value  Std.Error   DF    t-value       p-value
## (Intercept)           9.97973965 0.35869327 5549 27.8224891 1.350163e-159
## PileE                -0.50195236 0.53945451    4 -0.9304814  4.047934e-01
## pile_moist_sc         0.05330505 0.01264031 5549  4.2170692  2.514364e-05
## Chamber_Elevation_sc  0.21235228 0.03283622 5549  6.4670137  1.085142e-10
## PileE:pile_moist_sc  -0.08227815 0.03994925 5549 -2.0595668  3.948652e-02

# ---- 3) ML refits for valid model comparison ----
m_co2e_elev_ml <- lme(
  log_flux ~ Pile + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e_moist_int,   # IMPORTANT: same data object
  method = "ML",
  na.action = na.omit
)

m_co2e_moist_int_ml <- lme(
  log_flux ~ Pile * pile_moist_sc + Chamber_Elevation_sc,
  random = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights = varIdent(form = ~ 1 | Pile),
  data = dat_co2e_moist_int,   # same data object
  method = "ML",
  na.action = na.omit
)

anova(m_co2e_elev_ml, m_co2e_moist_int_ml)

##                     Model df      AIC      BIC    logLik   Test  L.Ratio
## m_co2e_elev_ml          1  7 4827.596 4873.957 -2406.798                
## m_co2e_moist_int_ml     2  9 4813.165 4872.772 -2397.583 1 vs 2 18.43094
##                     p-value
## m_co2e_elev_ml             
## m_co2e_moist_int_ml   1e-04

# ---- 4) Prediction grid + plot ----
moist_seq <- seq(
  min(dat_co2e_moist_int$pile_moist_sc, na.rm = TRUE),
  max(dat_co2e_moist_int$pile_moist_sc, na.rm = TRUE),
  length.out = 100
)

newdat <- expand.grid(
  pile_moist_sc = moist_seq,
  Pile = levels(dat_co2e_moist_int$Pile)
)

# hold elevation at mean (0 after scaling)
newdat$Chamber_Elevation_sc <- 0

# predict fixed effects only
newdat$pred_log  <- predict(m_co2e_moist_int, newdata = newdat, level = 0)
newdat$pred_flux <- exp(newdat$pred_log)

ggplot(dat_co2e_moist_int,
       aes(x = pile_moist_sc, y = exp(log_flux), colour = Pile)) +
  geom_point(alpha = 0.15, size = 1) +
  geom_line(data = newdat, aes(y = pred_flux), linewidth = 1.2) +
  theme_bw(base_size = 12) +
  theme(panel.grid.minor = element_blank()) +
  labs(
    x = "Pile moisture (scaled)",
    y = expression(Co2e~flux~(back~transformed)),
    colour = "Pile"
  )

We see the model that included moisture and temp is better (but that is both I think temperature is a bigger part of that so I would take this with a grain of salt.) When looking at co2e there is a pile interaction but that must be from co2 as we looked individually and did not see an interaction for many of the trace gases.

Looking at impacts post turning -

# ── Post-turn flag ────────────────────────────────────────────────────────────
dat_co2_pt <- dat_co2_int %>%
  mutate(
    PostTurn_0_1d = if_else(
      !is.na(DaysSinceLastTurn) &
        DaysSinceLastTurn > 0 &
        DaysSinceLastTurn <= 1,
      1L, 0L
    )
  )

count(dat_co2_pt, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  4658
## 2 C                 1   331
## 3 E                 0  4019
## 4 E                 1   114

# ── Remove invalid flux values ────────────────────────────────────────────────
dat_co2_pt_use <- dat_co2_pt %>%
  filter(!is.na(FCO2_DRY.LIN), is.finite(FCO2_DRY.LIN))

count(dat_co2_pt_use, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  4658
## 2 C                 1   331
## 3 E                 0  4019
## 4 E                 1   114

# ── Descriptive summary ───────────────────────────────────────────────────────
summary_co2 <- dat_co2_pt_use %>%
  group_by(Pile, PostTurn_0_1d) %>%
  summarise(
    n      = n(),
    mean   = mean(FCO2_DRY.LIN),
    sd     = sd(FCO2_DRY.LIN),
    se     = sd / sqrt(n),
    median = median(FCO2_DRY.LIN),
    .groups = "drop"
  )
summary_co2

## # A tibble: 4 × 7
##   Pile  PostTurn_0_1d     n  mean    sd    se median
##   <fct>         <int> <int> <dbl> <dbl> <dbl>  <dbl>
## 1 C                 0  4658  190.  179.  2.63  134. 
## 2 C                 1   331  220.  408. 22.4    95.5
## 3 E                 0  4019  194.  196.  3.09  146. 
## 4 E                 1   114  154.  171. 16.0   126.

# ── Effect size table ─────────────────────────────────────────────────────────
co2_effect <- summary_co2 %>%
  select(Pile, PostTurn_0_1d, mean, median) %>%
  tidyr::pivot_wider(
    names_from  = PostTurn_0_1d,
    values_from = c(mean, median),
    names_prefix = "PostTurn="
  ) %>%
  mutate(
    mean_diff    = `mean_PostTurn=1`   - `mean_PostTurn=0`,
    mean_ratio   = `mean_PostTurn=1`   / `mean_PostTurn=0`,
    median_diff  = `median_PostTurn=1` - `median_PostTurn=0`,
    median_ratio = `median_PostTurn=1` / `median_PostTurn=0`
  )
co2_effect

## # A tibble: 2 × 9
##   Pile  `mean_PostTurn=0` `mean_PostTurn=1` `median_PostTurn=0`
##   <fct>             <dbl>             <dbl>               <dbl>
## 1 C                  190.              220.                134.
## 2 E                  194.              154.                146.
## # ℹ 5 more variables: `median_PostTurn=1` <dbl>, mean_diff <dbl>,
## #   mean_ratio <dbl>, median_diff <dbl>, median_ratio <dbl>

# ── Boxplot ───────────────────────────────────────────────────────────────────
ggplot(dat_co2_pt_use,
       aes(x = factor(PostTurn_0_1d),
           y = FCO2_DRY.LIN,
           fill = Pile)) +
  geom_boxplot(outlier.alpha = 0.3) +
  scale_x_discrete(labels = c("0" = "Not post-turn",
                               "1" = "0–1 d post-turn")) +
  labs(x = "", y = "CO₂ flux") +
  theme_bw()

# ── Models (REML) — now include temperature and elevation as covariates ───────
# Interaction model: does the turning effect differ between piles?
m_co2_postturn <- lme(
  fixed = log(FCO2_DRY.LIN) ~ Pile * PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_co2_pt_use,
  method      = "REML",
  na.action   = na.omit
)

# Additive model: turning effect same across piles
m_co2_no_int <- lme(
  fixed = log(FCO2_DRY.LIN) ~ Pile + PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_co2_pt_use,
  method      = "REML",
  na.action   = na.omit
)

# ── Model comparison (must use ML for LRT) ────────────────────────────────────
m_co2_postturn_ml <- update(m_co2_postturn, method = "ML")
m_co2_no_int_ml   <- update(m_co2_no_int,   method = "ML")

anova(m_co2_no_int_ml, m_co2_postturn_ml)

##                   Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_co2_no_int_ml       1  9 4619.119 4683.185 -2300.560                        
## m_co2_postturn_ml     2 10 4595.156 4666.341 -2287.578 1 vs 2 25.96302  <.0001

# ── Final summary (REML estimates) ───────────────────────────────────────────
summary(m_co2_postturn)$tTable

##                            Value  Std.Error   DF     t-value      p-value
## (Intercept)           4.87392355 0.28074325 9112 17.36078648 1.903702e-66
## PileE                -0.01099229 0.39859501    4 -0.02757758 9.793201e-01
## PostTurn_0_1d         0.01075701 0.04587157 9112  0.23450272 8.146000e-01
## pile_temp_sc          0.36841119 0.02166840 9112 17.00223109 7.582838e-64
## Chamber_Elevation_sc  0.09407421 0.02204548 9112  4.26727872 1.998735e-05
## PileE:PostTurn_0_1d   0.47823877 0.09372350 9112  5.10265603 3.416751e-07

So if we look at day after turning between the pile there is a huge difference that is statistically sig, if you change this to 3 days there is too much noise but the mean is still higher. This is something that can be used to say that there are differences in how the piles respond to turning based on the treatment. Not sure what to say about this.

The same post turn analysis but for ch4

# ── Post-turn flag ────────────────────────────────────────────────────────────
dat_ch4_pt <- dat_ch4_int %>%
  mutate(
    PostTurn_0_1d = if_else(
      !is.na(DaysSinceLastTurn) &
        DaysSinceLastTurn > 0 &
        DaysSinceLastTurn <= 1,
      1L, 0L
    )
  )

count(dat_ch4_pt, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  4481
## 2 C                 1   398
## 3 E                 0  3998
## 4 E                 1   128

# ── Remove invalid flux values ────────────────────────────────────────────────
dat_ch4_pt_use <- dat_ch4_pt %>%
  filter(!is.na(FCH4_DRY.LIN), is.finite(FCH4_DRY.LIN))

count(dat_ch4_pt_use, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  4481
## 2 C                 1   398
## 3 E                 0  3998
## 4 E                 1   128

# ── Descriptive summary ───────────────────────────────────────────────────────
summary_ch4 <- dat_ch4_pt_use %>%
  group_by(Pile, PostTurn_0_1d) %>%
  summarise(
    n      = n(),
    mean   = mean(FCH4_DRY.LIN),
    sd     = sd(FCH4_DRY.LIN),
    se     = sd / sqrt(n),
    median = median(FCH4_DRY.LIN),
    .groups = "drop"
  )
summary_ch4

## # A tibble: 4 × 7
##   Pile  PostTurn_0_1d     n  mean     sd    se median
##   <fct>         <int> <int> <dbl>  <dbl> <dbl>  <dbl>
## 1 C                 0  4481 5591.  9321. 139.   1260.
## 2 C                 1   398 6488. 14168. 710.   1377.
## 3 E                 0  3998 3661.  6009.  95.0  1469.
## 4 E                 1   128 2174.  1831. 162.   1616.

# ── Effect size table ─────────────────────────────────────────────────────────
ch4_effect <- summary_ch4 %>%
  select(Pile, PostTurn_0_1d, mean, median) %>%
  tidyr::pivot_wider(
    names_from   = PostTurn_0_1d,
    values_from  = c(mean, median),
    names_prefix = "PostTurn="
  ) %>%
  mutate(
    mean_diff    = `mean_PostTurn=1`   - `mean_PostTurn=0`,
    mean_ratio   = `mean_PostTurn=1`   / `mean_PostTurn=0`,
    median_diff  = `median_PostTurn=1` - `median_PostTurn=0`,
    median_ratio = `median_PostTurn=1` / `median_PostTurn=0`
  )
ch4_effect

## # A tibble: 2 × 9
##   Pile  `mean_PostTurn=0` `mean_PostTurn=1` `median_PostTurn=0`
##   <fct>             <dbl>             <dbl>               <dbl>
## 1 C                 5591.             6488.               1260.
## 2 E                 3661.             2174.               1469.
## # ℹ 5 more variables: `median_PostTurn=1` <dbl>, mean_diff <dbl>,
## #   mean_ratio <dbl>, median_diff <dbl>, median_ratio <dbl>

# ── Boxplot ───────────────────────────────────────────────────────────────────
ggplot(dat_ch4_pt_use,
       aes(x = factor(PostTurn_0_1d),
           y = FCH4_DRY.LIN,
           fill = Pile)) +
  geom_boxplot(outlier.alpha = 0.3) +
  scale_x_discrete(labels = c("0" = "Not post-turn",
                               "1" = "0–1 d post-turn")) +
  labs(x = "", y = "CH₄ flux") +        # fixed: was labelled CO₂
  theme_bw()

# ── Models (REML) — include temperature and elevation as covariates ───────────
m_ch4_postturn <- lme(
  fixed = log(FCH4_DRY.LIN) ~ Pile * PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_ch4_pt_use,
  method      = "REML",
  na.action   = na.omit
)

m_ch4_no_int <- lme(
  fixed = log(FCH4_DRY.LIN) ~ Pile + PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_ch4_pt_use,
  method      = "REML",
  na.action   = na.omit
)

# ── Model comparison (ML required for LRT) ────────────────────────────────────
m_ch4_postturn_ml <- update(m_ch4_postturn, method = "ML")
m_ch4_no_int_ml   <- update(m_ch4_no_int,   method = "ML")

anova(m_ch4_no_int_ml, m_ch4_postturn_ml)

##                   Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m_ch4_no_int_ml       1  9 9998.492 10062.44 -4990.246                        
## m_ch4_postturn_ml     2 10 9991.386 10062.44 -4985.693 1 vs 2 9.106052  0.0025

# ── Final summary (REML estimates) ───────────────────────────────────────────
summary(m_ch4_postturn)$tTable

##                            Value  Std.Error   DF    t-value      p-value
## (Intercept)           6.99468226 0.81693312 8995  8.5621235 1.290747e-17
## PileE                -0.61117942 1.17816867    4 -0.5187538 6.313099e-01
## PostTurn_0_1d        -0.12770462 0.05355946 8995 -2.3843523 1.712982e-02
## pile_temp_sc          0.07310311 0.03283966 8995  2.2260615 2.603474e-02
## Chamber_Elevation_sc  0.26270661 0.04030089 8995  6.5186300 7.476646e-11
## PileE:PostTurn_0_1d   0.43132846 0.14287812 8995  3.0188560 2.544423e-03

this is less clear than co2 but still stat sig - so ch4 emission post turning differ by pile and thus we should include interaction term.

n2o 1 day post turn analysis: does it differ between piles ?

# ── Post-turn flag ────────────────────────────────────────────────────────────
dat_n2o_pt <- dat_n2o_int %>%
  mutate(
    PostTurn_0_1d = if_else(
      !is.na(DaysSinceLastTurn) &
        DaysSinceLastTurn > 0 &
        DaysSinceLastTurn <= 1,
      1L, 0L
    )
  )

count(dat_n2o_pt, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  6397
## 2 C                 1   509
## 3 E                 0  6641
## 4 E                 1   269

# ── Remove invalid flux values ────────────────────────────────────────────────
dat_n2o_pt_use <- dat_n2o_pt %>%
  filter(!is.na(FN2O_DRY.LIN), is.finite(FN2O_DRY.LIN))

count(dat_n2o_pt_use, Pile, PostTurn_0_1d)

## # A tibble: 4 × 3
##   Pile  PostTurn_0_1d     n
##   <fct>         <int> <int>
## 1 C                 0  6397
## 2 C                 1   509
## 3 E                 0  6641
## 4 E                 1   269

# ── Descriptive summary ───────────────────────────────────────────────────────
summary_n2o <- dat_n2o_pt_use %>%
  group_by(Pile, PostTurn_0_1d) %>%
  summarise(
    n      = n(),
    mean   = mean(FN2O_DRY.LIN),
    sd     = sd(FN2O_DRY.LIN),
    se     = sd / sqrt(n),
    median = median(FN2O_DRY.LIN),
    .groups = "drop"
  )
summary_n2o

## # A tibble: 4 × 7
##   Pile  PostTurn_0_1d     n  mean    sd    se median
##   <fct>         <int> <int> <dbl> <dbl> <dbl>  <dbl>
## 1 C                 0  6397 154.   298.  3.73   61.5
## 2 C                 1   509 200.   247. 10.9   111. 
## 3 E                 0  6641  96.1  247.  3.03   34.7
## 4 E                 1   269 154.   195. 11.9    89.9

# ── Effect size table ─────────────────────────────────────────────────────────
n2o_effect <- summary_n2o %>%
  select(Pile, PostTurn_0_1d, mean, median) %>%
  tidyr::pivot_wider(
    names_from   = PostTurn_0_1d,
    values_from  = c(mean, median),
    names_prefix = "PostTurn="
  ) %>%
  mutate(
    mean_diff    = `mean_PostTurn=1`   - `mean_PostTurn=0`,
    mean_ratio   = `mean_PostTurn=1`   / `mean_PostTurn=0`,
    median_diff  = `median_PostTurn=1` - `median_PostTurn=0`,
    median_ratio = `median_PostTurn=1` / `median_PostTurn=0`
  )
n2o_effect

## # A tibble: 2 × 9
##   Pile  `mean_PostTurn=0` `mean_PostTurn=1` `median_PostTurn=0`
##   <fct>             <dbl>             <dbl>               <dbl>
## 1 C                 154.               200.                61.5
## 2 E                  96.1              154.                34.7
## # ℹ 5 more variables: `median_PostTurn=1` <dbl>, mean_diff <dbl>,
## #   mean_ratio <dbl>, median_diff <dbl>, median_ratio <dbl>

# ── Boxplot ───────────────────────────────────────────────────────────────────
ggplot(dat_n2o_pt_use,
       aes(x = factor(PostTurn_0_1d),
           y = FN2O_DRY.LIN,
           fill = Pile)) +
  geom_boxplot(outlier.alpha = 0.3) +
  scale_x_discrete(labels = c("0" = "Not post-turn",
                               "1" = "0–1 d post-turn")) +
  labs(x = "", y = "N₂O flux") +        # fixed: was labelled CO₂
  theme_bw()

# ── Models (REML) — include temperature and elevation as covariates ───────────
m_n2o_postturn <- lme(
  fixed = log(FN2O_DRY.LIN) ~ Pile * PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_n2o_pt_use,
  method      = "REML",
  na.action   = na.omit
)

m_n2o_no_int <- lme(
  fixed = log(FN2O_DRY.LIN) ~ Pile + PostTurn_0_1d +
                               pile_temp_sc +
                               Chamber_Elevation_sc,
  random      = ~ 1 | Chamber_Corrected,
  correlation = corAR1(form = ~ t_index | Chamber_Corrected),
  weights     = varIdent(form = ~ 1 | Pile),
  data        = dat_n2o_pt_use,
  method      = "REML",
  na.action   = na.omit
)

# ── Model comparison (ML required for LRT) ────────────────────────────────────
m_n2o_postturn_ml <- update(m_n2o_postturn, method = "ML")
m_n2o_no_int_ml   <- update(m_n2o_no_int,   method = "ML")

anova(m_n2o_no_int_ml, m_n2o_postturn_ml)

##                   Model df      AIC      BIC   logLik   Test      L.Ratio
## m_n2o_no_int_ml       1  9 10493.78 10561.58 -5237.89                    
## m_n2o_postturn_ml     2 10 10495.78 10571.11 -5237.89 1 vs 2 2.604356e-07
##                   p-value
## m_n2o_no_int_ml          
## m_n2o_postturn_ml  0.9996

# ── Final summary (REML estimates) ───────────────────────────────────────────
summary(m_n2o_postturn)$tTable

##                              Value  Std.Error    DF      t-value      p-value
## (Intercept)           3.9918411914 0.23661130 13806 16.870881466 3.164463e-63
## PileE                -0.4048616493 0.34676474     4 -1.167539854 3.078416e-01
## PostTurn_0_1d         0.5040294456 0.04870305 13806 10.349032067 5.217810e-25
## pile_temp_sc          0.0098176008 0.02558450 13806  0.383732347 7.011827e-01
## Chamber_Elevation_sc  0.1981037776 0.03789944 13806  5.227090447 1.747089e-07
## PileE:PostTurn_0_1d   0.0002533443 0.10052066 13806  0.002520321 9.979891e-01

Hmmmmm this one is weird so I’m not sure how to interpret it, but there is a stat difference p value of .9 at 1 day and .09 at 3 days and then at 7 days we see 0.00 something, this is the opposite of ch4 and co2 which the effect decreases with time.. So it seems n2o does not change immediately after turning, there is a delay vs co2 and ch4 where there is an immediate change, this might be mechanistic as it might be more microbe mediated vs built up release and aeration related ?

So there is a period after turning where n2o is different that is from like 2-3 days to about a week and then they are more similar, this pulse is very interesting.

Looking at ratios of gases:

While the means are not significantly different the ratio of trace gases might be, we would expect the exp pile to have more a higher ch4/co2 ratio compared to the more aerated c pile.

``` r # ── Build ratio dataset ─────────────────────────────────────────────────────── dat_ratio <- Flux_All_Gases_QC %>% mutate( CO2_umol = FCO2_DRY.LIN, CH4_umol = FCH4_DRY.LIN * 0.001 # nmol → µmol ) %>% filter(CO2_umol > 0, CH4_umol > 0) %>% # log undefined at <= 0 mutate( ratio_CO2_CH4 = CO2_umol / CH4_umol, log_ratio = log(ratio_CO2_CH4) ) %>% arrange(Chamber_Corrected, DOY) %>% group_by(Chamber_Corrected) %>% mutate(t_index = row_number()) %>% ungroup()

# ── Sanity check ───────────────────────────────────────────────────────────── dat_ratio %>% summarise( n_obs = n(), n_chambers = n_distinct(Chamber_Corrected), min_ratio = min(ratio_CO2_CH4), max_ratio = max(ratio_CO2_CH4) ) ```

## # A tibble: 1 × 4 ##   n_obs n_chambers min_ratio max_ratio ##   <int>      <int>     <dbl>     <dbl> ## 1  5558          6      2.23   115095.

``` r # ── Model ───────────────────────────────────────────────────────────────────── m_ratio <- lme( log_ratio ~ Pile, random = ~ 1 | Chamber_Corrected, correlation = corAR1(form = ~ t_index | Chamber_Corrected), weights = varIdent(form = ~ 1 | Pile), data = dat_ratio, method = “REML” )

summary(m_ratio)$tTable ```

##                 Value Std.Error   DF   t-value      p-value ## (Intercept) 4.7282131 0.6300319 5552 7.5047198 7.129997e-14 ## PileE       0.8996436 0.9315278    4 0.9657721 3.888485e-01

``` r # ── Pile means and contrast on response scale ───────────────────────────────── emm_link <- emmeans(m_ratio, ~ Pile) ctr_link_ci <- confint(pairs(emm_link))

ratio_E_over_C <- as.data.frame(ctr_link_ci) %>% mutate( ratio = exp(estimate), ratio_low = exp(lower.CL), ratio_high = exp(upper.CL) ) ratio_E_over_C ```

##   contrast   estimate        SE df lower.CL upper.CL     ratio  ratio_low ## 1    C - E -0.8996436 0.9315278  4 -3.48598 1.686692 0.4067146 0.03062375 ##   ratio_high ## 1   5.401585

r # ── Pile means on response scale ────────────────────────────────────────────── confint(emmeans(m_ratio, ~ Pile, type = "response"))

##  Pile emmean    SE df lower.CL upper.CL ##  C      4.73 0.630  5     3.11     6.35 ##  E      5.63 0.686  4     3.72     7.53 ## ## Degrees-of-freedom method: containment ## Confidence level used: 0.95

r # ── Diagnostics ─────────────────────────────────────────────────────────────── plot(m_ratio)

r qqnorm(resid(m_ratio)); qqline(resid(m_ratio))

We do not have to use specific figures we can edit this, what Zaie has done on the ratios is similiar. There is not a statistical difference in the ratio of co2 to ch4 throughout the whole experimental cycle as there just are not enough df and there is too much variance, what we do see if a large effect size for the whole experiment.

The difference by pile for the entire experimental cycle is not significant by pile: the CIs are large: still - mean for Control is higher lower which means the control emitted more ch4 per unit co2 or Pile E numerically shows a higher ratio (relatively less CH₄ per unit CO₂).

More can be done with ratios - need to work with Zaie

Looking at response to turning: slope of max temp post turning declines more quickly in the e vs the c: why is that ?

library(broom)
library(gt)
# ============================================================
# POST-TURN TEMPERATURE ANALYSIS
# Peak temperature after each turning event
# ============================================================

# ── CONTROL PILE (C1, C2, C3) — all turning events ───────────────────────────

# Step 1 - Build post/pre turn window
all_turns_sorted <- sort(c(turns_both_red, turns_control_black))

post_turn_temp <- map_dfr(all_turns_sorted, function(turn) {
  Cleaned_Data %>%
    filter(Chamber_Corrected %in% c("C1", "C2", "C3"),
           DOY.initial_value >= turn - 3,
           DOY.initial_value <= turn + 12) %>%
    mutate(
      turn_event     = turn,
      days_post_turn = DOY.initial_value - turn
    )
})

# Step 2 - Daily averages (min 5 obs, exclude turning day)
daily_avg_turns <- post_turn_temp %>%
  mutate(day_bin = floor(days_post_turn)) %>%
  filter(day_bin != 0) %>%
  group_by(turn_event, Chamber_Corrected, day_bin) %>%
  summarise(
    daily_mean = mean(pile_temp, na.rm = TRUE),
    n_obs      = n(),
    .groups    = "drop"
  ) %>%
  filter(n_obs >= 5)

# Step 3 - Qualify combos (days 1-5 must all be present)
qualified_combos <- daily_avg_turns %>%
  filter(day_bin >= 1, day_bin <= 5) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(days_covered = n_distinct(day_bin), .groups = "drop") %>%
  filter(days_covered >= 5)

# Step 4 - Pre-turn reference
pre_turn_ref <- daily_avg_turns %>%
  filter(day_bin < 0) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    pre_turn_day  = max(day_bin),
    pre_turn_temp = daily_mean[day_bin == max(day_bin)],
    .groups = "drop"
  )

# Step 5 - Peak metrics (control)
peak_metrics <- daily_avg_turns %>%
  inner_join(qualified_combos %>% select(turn_event, Chamber_Corrected),
             by = c("turn_event", "Chamber_Corrected")) %>%
  filter(day_bin >= 1) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    peak_temp = max(daily_mean, na.rm = TRUE),
    peak_day  = day_bin[which.max(daily_mean)],
    temp_day1 = ifelse(any(day_bin == 1), daily_mean[day_bin == 1], NA_real_),
    .groups   = "drop"
  ) %>%
  left_join(pre_turn_ref, by = c("turn_event", "Chamber_Corrected")) %>%
  mutate(
    spike_magnitude = ifelse(!is.na(pre_turn_temp) & peak_day > 1,
                             peak_temp - pre_turn_temp, NA_real_),
    time_to_peak    = ifelse(!is.na(temp_day1) & peak_day > 1 &
                               peak_temp > temp_day1, peak_day, NA_real_)
  ) %>%
  left_join(
    tibble(turn_event  = all_turns_sorted,
           turn_number = seq_along(all_turns_sorted)),
    by = "turn_event"
  )

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `peak_temp = max(daily_mean, na.rm = TRUE)`.
## ℹ In group 7: `turn_event = 191` `Chamber_Corrected = "C2"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ── EXPERIMENTAL PILE (C4, C5, C6) — both-turned events only ─────────────────

exp_turns_sorted <- sort(turns_both_red)

post_turn_temp_exp <- map_dfr(exp_turns_sorted, function(turn) {
  Cleaned_Data %>%
    filter(Chamber_Corrected %in% c("C4", "C5", "C6"),
           DOY.initial_value >= turn - 3,
           DOY.initial_value <= turn + 12) %>%
    mutate(
      turn_event     = turn,
      days_post_turn = DOY.initial_value - turn
    )
})

daily_avg_turns_exp <- post_turn_temp_exp %>%
  mutate(day_bin = floor(days_post_turn)) %>%
  filter(day_bin != 0) %>%
  group_by(turn_event, Chamber_Corrected, day_bin) %>%
  summarise(
    daily_mean = mean(pile_temp, na.rm = TRUE),
    n_obs      = n(),
    .groups    = "drop"
  ) %>%
  filter(n_obs >= 5)

qualified_combos_exp <- daily_avg_turns_exp %>%
  filter(day_bin >= 1, day_bin <= 5) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(days_covered = n_distinct(day_bin), .groups = "drop") %>%
  filter(days_covered >= 5)

pre_turn_ref_exp <- daily_avg_turns_exp %>%
  filter(day_bin < 0) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    pre_turn_day  = max(day_bin),
    pre_turn_temp = daily_mean[day_bin == max(day_bin)],
    .groups = "drop"
  )

peak_metrics_exp <- daily_avg_turns_exp %>%
  inner_join(qualified_combos_exp %>% select(turn_event, Chamber_Corrected),
             by = c("turn_event", "Chamber_Corrected")) %>%
  filter(day_bin >= 1) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    peak_temp = max(daily_mean, na.rm = TRUE),
    peak_day  = day_bin[which.max(daily_mean)],
    temp_day1 = ifelse(any(day_bin == 1), daily_mean[day_bin == 1], NA_real_),
    .groups   = "drop"
  ) %>%
  left_join(pre_turn_ref_exp, by = c("turn_event", "Chamber_Corrected")) %>%
  mutate(
    spike_magnitude = ifelse(!is.na(pre_turn_temp) & peak_day > 1,
                             peak_temp - pre_turn_temp, NA_real_),
    time_to_peak    = ifelse(!is.na(temp_day1) & peak_day > 1 &
                               peak_temp > temp_day1, peak_day, NA_real_)
  ) %>%
  left_join(
    tibble(turn_event  = exp_turns_sorted,
           turn_number = seq_along(exp_turns_sorted)),
    by = "turn_event"
  )

## Warning: There were 2 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `peak_temp = max(daily_mean, na.rm = TRUE)`.
## ℹ In group 8: `turn_event = 234` `Chamber_Corrected = "C5"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ── FIGURES — Peak temperature only ──────────────────────────────────────────

p_peak <- ggplot(peak_metrics %>% filter(!is.na(peak_temp)),
                 aes(x = turn_number, y = peak_temp)) +
  # raw chamber points — opaque to show variability
  geom_point(aes(colour = Chamber_Corrected),
             size = 2.5, alpha = 0.35) +
  # chamber lines — faint to connect dots per chamber
  geom_line(aes(colour = Chamber_Corrected, group = Chamber_Corrected),
            linewidth = 0.5, alpha = 0.25) +
  # overall trend line across all chambers
  geom_smooth(method = "lm", se = TRUE,
              colour = "black", fill = "grey30",
              linewidth = 0.9, linetype = "dashed", alpha = 0.15) +
  scale_colour_manual(values = chamber_cols) +
  scale_x_continuous(breaks = 1:11) +
  theme_bw(base_size = 11) +
  theme(
    legend.position  = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  ) +
  labs(x      = "Turning event number",
       y      = "Peak temperature (°C)",
       colour = "Chamber",
       title  = "Control pile — peak post-turn temperature")

p_peak_exp <- ggplot(peak_metrics_exp %>% filter(!is.na(peak_temp)),
                     aes(x = turn_number, y = peak_temp)) +
  geom_point(aes(colour = Chamber_Corrected),
             size = 2.5, alpha = 0.35) +
  geom_line(aes(colour = Chamber_Corrected, group = Chamber_Corrected),
            linewidth = 0.5, alpha = 0.25) +
  geom_smooth(method = "lm", se = TRUE,
              colour = "black", fill = "grey30",
              linewidth = 0.9, linetype = "dashed", alpha = 0.15) +
  scale_colour_manual(values = chamber_cols) +
  scale_x_continuous(breaks = 1:6) +
  theme_bw(base_size = 11) +
  theme(
    legend.position  = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  ) +
  labs(x      = "Turning event number",
       y      = "Peak temperature (°C)",
       colour = "Chamber",
       title  = "Experimental pile — peak post-turn temperature")

p_peak / p_peak_exp

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# ── LINEAR MODELS — peak temperature only ────────────────────────────────────
lm_peak     <- lm(peak_temp ~ turn_number,
                  data = peak_metrics %>% filter(!is.na(peak_temp)))

lm_peak_exp <- lm(peak_temp ~ turn_number,
                  data = peak_metrics_exp %>% filter(!is.na(peak_temp)))

# ── SUMMARY TABLE ─────────────────────────────────────────────────────────────
library(broom)

slopes <- bind_rows(
  tidy(lm_peak) %>%
    filter(term == "turn_number") %>%
    mutate(pile = "Control"),
  tidy(lm_peak_exp) %>%
    filter(term == "turn_number") %>%
    mutate(pile = "Experimental")
) %>%
  select(pile, estimate, std.error)

model_stats <- bind_rows(
  glance(lm_peak)     %>% mutate(pile = "Control"),
  glance(lm_peak_exp) %>% mutate(pile = "Experimental")
) %>%
  select(pile, r.squared, statistic, p.value, df, df.residual)

left_join(slopes, model_stats, by = "pile") %>%
  mutate(across(c(estimate, std.error), ~ round(.x, 2)),
         across(c(r.squared, statistic), ~ round(.x, 3)),
         p.value = round(p.value, 4)) %>%
  gt() %>%
  cols_label(
    pile        = "Pile",
    estimate    = "Slope (°C/turn)",
    std.error   = "SE",
    r.squared   = "R²",
    statistic   = "F",
    p.value     = "p-value",
    df          = "df",
    df.residual = "df residual"
  ) %>%
  tab_header(
    title    = "Linear Model Results — Peak Post-turn Temperature",
    subtitle = "Change in peak temperature per successive turning event"
  ) %>%
  tab_style(style     = cell_text(weight = "bold"),
            locations = cells_body(rows = p.value < 0.05)) %>%
  tab_style(style     = cell_text(weight = "bold"),
            locations = cells_column_labels()) %>%
  fmt_number(columns = p.value, decimals = 4) %>%
  sub_missing(missing_text = "—") %>%
  opt_table_font(font = "Times New Roman")

Pile	Slope (°C/turn)	SE	R²	F	p-value	df	df residual
Linear Model Results — Peak Post-turn Temperature
Change in peak temperature per successive turning event
Control	-1.86	0.48	0.459	15.241	0.0010	1	18
Experimental	-8.88	1.36	0.842	42.662	0.0002	1	8

Turning events looking at temp similar analysis to the above needs to be worked on

library(tidyverse)

# All turning events for control chambers
all_turns_sorted <- sort(c(turns_both_red, turns_control_black))

# Filter to control chambers and ALL turning events
control_turns <- post_turn_temp %>%
  filter(Chamber_Corrected %in% c("C1", "C2", "C3"),
         turn_event %in% all_turns_sorted)

# Check data availability per day per chamber per turn
data_availability <- control_turns %>%
  mutate(day_bin = floor(days_post_turn)) %>%
  filter(day_bin >= 1, day_bin <= 5) %>%
  group_by(turn_event, Chamber_Corrected, day_bin) %>%
  summarise(n_obs = n(), .groups = "drop")

# Flag chamber-turn combinations meeting minimum 5 obs per day
qualified_combos <- data_availability %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    days_with_enough = sum(n_obs >= 5),
    all_days_covered = days_with_enough >= 5,
    .groups = "drop"
  ) %>%
  filter(all_days_covered)

qualified_combos

## # A tibble: 21 × 4
##    turn_event Chamber_Corrected days_with_enough all_days_covered
##         <dbl> <chr>                        <int> <lgl>           
##  1        159 C1                               5 TRUE            
##  2        159 C2                               5 TRUE            
##  3        159 C3                               5 TRUE            
##  4        173 C2                               5 TRUE            
##  5        173 C3                               5 TRUE            
##  6        191 C1                               5 TRUE            
##  7        191 C2                               5 TRUE            
##  8        208 C1                               5 TRUE            
##  9        208 C2                               5 TRUE            
## 10        208 C3                               5 TRUE            
## # ℹ 11 more rows

# Fit cubic polynomial for each qualifying chamber-turn combination
poly_metrics <- control_turns %>%
  inner_join(qualified_combos %>%
               select(turn_event, Chamber_Corrected),
             by = c("turn_event", "Chamber_Corrected")) %>%
  group_by(turn_event, Chamber_Corrected) %>%
  filter(sum(!is.na(pile_temp)) >= 10) %>%  # minimum 10 observations to fit poly
  group_modify(~ {
    df <- .x
    
    # Fit cubic polynomial
    fit <- tryCatch(
      lm(pile_temp ~ poly(days_post_turn, 3, raw = TRUE), data = df),
      error = function(e) NULL
    )
    
    if (is.null(fit)) {
      return(tibble(
        slope_at_turn  = NA,
        time_to_peak   = 0,
        peak_temp      = NA,
        pre_turn_mean  = NA
      ))
    }
    
    # Extract coefficients: a + bx + cx^2 + dx^3
    coefs <- coef(fit)
    a <- coefs[1]
    b <- coefs[2]
    c <- coefs[3]
    d <- coefs[4]
    
    # Slope (first derivative) at day 0: b + 2cx + 3dx^2 at x=0
    slope_at_turn <- b
    
    # Find turning points — solve derivative = 0
    # b + 2cx + 3dx^2 = 0
    # Use numerical approach across post-turn window
    x_seq <- seq(0, 12, by = 0.01)
    y_seq <- a + b*x_seq + c*x_seq^2 + d*x_seq^3
    dy_seq <- b + 2*c*x_seq + 3*d*x_seq^2
    
    # Find where derivative crosses zero (peak) after turn
    sign_changes <- which(diff(sign(dy_seq)) < 0)  # negative = peak not trough
    
    if (length(sign_changes) == 0 || max(y_seq) <= y_seq[1]) {
      # No peak found or polynomial declining throughout — no increase
      time_to_peak <- 0
      peak_temp    <- y_seq[1]
    } else {
      peak_idx     <- sign_changes[1]
      time_to_peak <- x_seq[peak_idx]
      peak_temp    <- y_seq[peak_idx]
    }
    
    # Pre-turn mean
    pre_turn_mean <- mean(df$pile_temp[df$days_post_turn < 0 &
                                         df$days_post_turn >= -3],
                          na.rm = TRUE)
    
    tibble(
      slope_at_turn = slope_at_turn,
      time_to_peak  = time_to_peak,
      peak_temp     = peak_temp,
      pre_turn_mean = pre_turn_mean
    )
  }) %>%
  ungroup()

poly_metrics

## # A tibble: 21 × 6
##    turn_event Chamber_Corrected slope_at_turn time_to_peak peak_temp
##         <dbl> <chr>                     <dbl>        <dbl>     <dbl>
##  1        159 C1                        0.547         1.27      59.4
##  2        159 C2                        2.46          4.76      74.4
##  3        159 C3                        0.340         1.93      67.9
##  4        173 C2                       -0.737         7.57      51.8
##  5        173 C3                       -0.110         6.95      59.6
##  6        191 C1                       -0.728         8.4       65.2
##  7        191 C2                      110.            0        138. 
##  8        208 C1                        0.391         0.74      59.4
##  9        208 C2                        4.96          2.98      53.3
## 10        208 C3                        1.65          1.45      61.4
## # ℹ 11 more rows
## # ℹ 1 more variable: pre_turn_mean <dbl>

# Plot 1 - slope at turn
p1 <- ggplot(poly_metrics,
             aes(x = turn_event, y = slope_at_turn,
                 colour = Chamber_Corrected,
                 group  = Chamber_Corrected)) +
  geom_hline(yintercept = 0, linetype = "dashed", 
             colour = "grey50", linewidth = 0.4) +
  geom_point(size = 2.5) +
  geom_line(linewidth = 0.6, alpha = 0.7) +
  geom_smooth(aes(group = 1), method = "lm",
              se = TRUE, colour = "black",
              linewidth = 0.8, linetype = "dashed") +
  scale_colour_manual(values = chamber_cols) +
  theme_bw(base_size = 11) +
  theme(legend.position  = "top",
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)) +
  labs(x = "Day of year (turning event)",
       y = "Slope at turn (°C/day)",
       colour = "Chamber",
       title = "Rate of temperature change at moment of turning")

# Plot 2 - time to peak
p2 <- ggplot(poly_metrics,
             aes(x = turn_event, y = time_to_peak,
                 colour = Chamber_Corrected,
                 group  = Chamber_Corrected)) +
  geom_point(size = 2.5) +
  geom_line(linewidth = 0.6, alpha = 0.7) +
  geom_smooth(aes(group = 1), method = "lm",
              se = TRUE, colour = "black",
              linewidth = 0.8, linetype = "dashed") +
  scale_colour_manual(values = chamber_cols) +
  theme_bw(base_size = 11) +
  theme(legend.position  = "none",
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)) +
  labs(x = "Day of year (turning event)",
       y = "Days to peak temperature",
       colour = "Chamber",
       title = "Time to peak temperature after turning")

# Plot 3 - peak temp
p3 <- ggplot(poly_metrics,
             aes(x = turn_event, y = peak_temp,
                 colour = Chamber_Corrected,
                 group  = Chamber_Corrected)) +
  geom_point(size = 2.5) +
  geom_line(linewidth = 0.6, alpha = 0.7) +
  geom_smooth(aes(group = 1), method = "lm",
              se = TRUE, colour = "black",
              linewidth = 0.8, linetype = "dashed") +
  scale_colour_manual(values = chamber_cols) +
  theme_bw(base_size = 11) +
  theme(legend.position  = "none",
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)) +
  labs(x = "Day of year (turning event)",
       y = "Peak temperature (°C)",
       colour = "Chamber",
       title = "Peak temperature reached after turning")

library(patchwork)
p1 / p2 / p3

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Turning events looking at temp similar analysis to the above needs to be worked on

# Step 1 - Daily averages with minimum 5 obs per day
daily_avg_turns <- control_turns %>%
  filter(Chamber_Corrected %in% c("C1", "C2", "C3"),
         turn_event %in% all_turns_sorted,
         days_post_turn >= -2) %>%
  mutate(day_bin = floor(days_post_turn)) %>%
  group_by(turn_event, Chamber_Corrected, day_bin) %>%
  summarise(
    daily_mean = mean(pile_temp, na.rm = TRUE),
    n_obs      = n(),
    .groups    = "drop"
  ) %>%
  filter(n_obs >= 5)  # minimum 5 obs per day

peak_metrics <- daily_avg_turns %>%
  group_by(turn_event, Chamber_Corrected) %>%
  summarise(
    # Use first available post-turn day as reference
    first_post_turn_day  = min(day_bin[day_bin >= 1], na.rm = TRUE),
    temp_at_turn         = daily_mean[day_bin == first_post_turn_day],
    peak_day             = day_bin[which.max(ifelse(day_bin >= 1, daily_mean, NA))],
    peak_temp            = max(daily_mean[day_bin >= 1], na.rm = TRUE),
    # Peak must be after first available day
    time_to_peak         = ifelse(peak_day > first_post_turn_day & 
                                    peak_temp > temp_at_turn,
                                  peak_day - first_post_turn_day, NA_real_),
    slope                = (peak_temp - temp_at_turn) / time_to_peak,
    spike_magnitude      = peak_temp - temp_at_turn,
    .groups = "drop"
  )

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `peak_temp = max(daily_mean[day_bin >= 1], na.rm = TRUE)`.
## ℹ In group 11: `turn_event = 191` `Chamber_Corrected = "C2"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

peak_metrics %>%
  filter(Chamber_Corrected == "C1") %>%
  select(turn_event, first_post_turn_day, temp_at_turn, 
         peak_day, peak_temp, time_to_peak)

## # A tibble: 11 × 6
##    turn_event first_post_turn_day temp_at_turn peak_day peak_temp time_to_peak
##         <dbl>               <dbl>        <dbl>    <dbl>     <dbl>        <dbl>
##  1        159                   1         59.2        2      60.3            1
##  2        173                   1         51.8        1      51.8           NA
##  3        186                   2         58.2       11      65.8            9
##  4        191                   1         50.5        6      65.8            5
##  5        208                   1         57.2        2      59.5            1
##  6        222                   1         49.8        3      54.3            2
##  7        234                   1         45.8        8      57.1            7
##  8        249                   1         55.1        2      58.2            1
##  9        262                   2         56.9        9      59.3            7
## 10        276                   1         49.7        4      61.8            3
## 11        290                   1         48.9        6      55.2            5

peak_metrics

## # A tibble: 32 × 9
##    turn_event Chamber_Corrected first_post_turn_day temp_at_turn peak_day
##         <dbl> <chr>                           <dbl>        <dbl>    <dbl>
##  1        159 C1                                  1         59.2        2
##  2        159 C2                                  1         75.3        2
##  3        159 C3                                  1         69.6        1
##  4        173 C1                                  1         51.8        1
##  5        173 C2                                  1         45.3        3
##  6        173 C3                                  1         54.2        4
##  7        186 C1                                  2         58.2       11
##  8        186 C2                                  2         68.4        3
##  9        186 C3                                  6         61.5        6
## 10        191 C1                                  1         50.5        6
## # ℹ 22 more rows
## # ℹ 4 more variables: peak_temp <dbl>, time_to_peak <dbl>, slope <dbl>,
## #   spike_magnitude <dbl>

c1_daily <- daily_avg_turns %>%
  filter(Chamber_Corrected == "C1") %>%
  left_join(peak_metrics %>%
              filter(Chamber_Corrected == "C1") %>%
              select(turn_event, peak_day, time_to_peak),
            by = "turn_event")

ggplot(c1_daily,
       aes(x = day_bin, y = daily_mean)) +
  geom_vline(xintercept = 0, linetype = "dashed",
             colour = "grey40", linewidth = 0.4) +
  geom_point(size = 2.5,
             colour = chamber_cols[["C1"]],
             alpha = 0.8) +
  geom_line(linewidth = 0.6,
            colour = chamber_cols[["C1"]],
            alpha = 0.7) +
  geom_point(data = c1_daily %>%
               filter(!is.na(time_to_peak) & day_bin == peak_day),
             aes(x = day_bin, y = daily_mean),
             colour = "black", size = 4, shape = 8) +
  facet_wrap(~ turn_event,
             labeller = labeller(turn_event = function(x) paste("DOY", x)),
             ncol = 3) +
  theme_bw(base_size = 11) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3),
    legend.position  = "none"
  ) +
  labs(x     = "Days relative to turning event",
       y     = expression("Temperature (°C)"),
       title = "C1 — Daily mean temperature per turning event (★ = peak)")

# Add turn number
turn_order <- tibble(
  turn_event = sort(c(turns_both_red, turns_control_black)),
  turn_number = 1:length(sort(c(turns_both_red, turns_control_black)))
)

peak_metrics_plot <- peak_metrics %>%
  left_join(turn_order, by = "turn_event") %>%
  filter(Chamber_Corrected %in% c("C1", "C2", "C3"))

# Plot spike magnitude by turn number
ggplot(peak_metrics_plot %>% filter(!is.na(spike_magnitude)),
       aes(x = turn_number, y = spike_magnitude,
           colour = Chamber_Corrected,
           group  = Chamber_Corrected)) +
  geom_point(size = 2.5) +
  geom_line(linewidth = 0.6, alpha = 0.7) +
  geom_smooth(aes(group = 1), method = "lm",
              se = TRUE, colour = "black",
              linewidth = 0.8, linetype = "dashed") +
  scale_colour_manual(values = chamber_cols) +
  scale_x_continuous(breaks = 1:11) +
  theme_bw(base_size = 11) +
  theme(
    legend.position  = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  ) +
  labs(x      = "Turning event number",
       y      = "Spike magnitude (°C)",
       colour = "Chamber",
       title  = "Post-turn temperature spike magnitude across composting cycle")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(peak_metrics_plot %>% filter(!is.na(peak_temp)),
       aes(x = turn_number, y = peak_temp,
           colour = Chamber_Corrected,
           group  = Chamber_Corrected)) +
  geom_point(size = 2.5) +
  geom_line(linewidth = 0.6, alpha = 0.7) +
  geom_smooth(aes(group = 1), method = "lm",
              se = TRUE, colour = "black",
              linewidth = 0.8, linetype = "dashed") +
  scale_colour_manual(values = chamber_cols) +
  scale_x_continuous(breaks = 1:11) +
  theme_bw(base_size = 11) +
  theme(
    legend.position  = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  ) +
  labs(x      = "Turning event number",
       y      = "Peak temperature post-turn (°C)",
       colour = "Chamber",
       title  = "Peak temperature reached after each turning event")

## `geom_smooth()` using formula = 'y ~ x'

# Linear model of peak temp vs turn number

lm_peak <- lm(peak_temp ~ turn_number, 
              data = peak_metrics_plot %>% filter(!is.na(peak_temp)))

summary(lm_peak)

## 
## Call:
## lm(formula = peak_temp ~ turn_number, data = peak_metrics_plot %>% 
##     filter(!is.na(peak_temp)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.1684  -3.6843   0.5645   5.1073  13.2625 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  69.2767     3.0394  22.793  < 2e-16 ***
## turn_number  -2.0777     0.4436  -4.683 5.69e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.009 on 30 degrees of freedom
## Multiple R-squared:  0.4223, Adjusted R-squared:  0.4031 
## F-statistic: 21.93 on 1 and 30 DF,  p-value: 5.689e-05

Plotting figures which show each chambers time in each zone and measuring this maturation phase

# Zone colours
zone_cols <- c(
  "Mesophilic"         = "#2166ac",
  "Transition"         = "#74add1", 
  "Thermophilic"       = "#f46d43",
  "Hyper-thermophilic" = "#a50026"
)

# Add zone classification to Cleaned_Data
Cleaned_Data <- Cleaned_Data %>%
  mutate(
    temp_zone = case_when(
      pile_temp < 40                    ~ "Mesophilic",
      pile_temp >= 40 & pile_temp < 45  ~ "Transition",
      pile_temp >= 45 & pile_temp <= 70 ~ "Thermophilic",
      pile_temp > 70                    ~ "Hyper-thermophilic",
      TRUE                              ~ NA_character_
    ),
    temp_zone = factor(temp_zone,
                       levels = c("Mesophilic", "Transition",
                                  "Thermophilic", "Hyper-thermophilic"))
  )


# Build individual chamber zone plots
zone_plots <- Cleaned_Data %>%
  split(.$Chamber_Corrected) %>%
  imap(~ ggplot(.x, aes(x = DOY, y = pile_temp, colour = temp_zone)) +
         geom_point(size = 0.6, alpha = 0.4) +
         geom_hline(yintercept = c(40, 45, 70),
                    linetype = "dashed",
                    colour   = "grey40",
                    linewidth = 0.3) +
         geom_vline(xintercept = turns_both_red,
                    colour    = "#9b1c31",
                    linetype  = "solid",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         geom_vline(xintercept = turns_control_black,
                    colour    = "#000000",
                    linetype  = "dashed",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         scale_colour_manual(values = zone_cols, na.value = "grey80") +
         theme_bw(base_size = 11) +
         theme(
           legend.position  = "none",
           panel.grid.minor = element_blank(),
           panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
         ) +
         labs(x      = "Day of year",
              y      = expression("Temperature (°C)"),
              title  = .y)
  )

# Shared legend
legend_plot <- ggplot(Cleaned_Data %>% filter(!is.na(temp_zone)),
                      aes(x = DOY, y = pile_temp, colour = temp_zone)) +
  geom_point() +
  scale_colour_manual(values = zone_cols, name = "Zone") +
  theme(legend.position = "top") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1)))

shared_legend <- cowplot::get_legend(legend_plot)

# Arrange
left_col  <- zone_plots[["C1"]] / zone_plots[["C2"]] / zone_plots[["C3"]]
right_col <- zone_plots[["C4"]] / zone_plots[["C5"]] / zone_plots[["C6"]]

fig_zones_individual <- wrap_elements(shared_legend) / 
                        (left_col | right_col) +
                        plot_layout(heights = c(0.05, 1))

fig_zones_individual

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 776 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 266 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1368 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 62 rows containing missing values or values outside the scale range
## (`geom_point()`).

# ============================================================
# ZONE SUMMARY WITH MISSING DATA - BY CHAMBER
# ============================================================

zone_summary_chamber <- Cleaned_Data %>%
  group_by(Chamber_Corrected, Pile) %>%
  summarise(
    n_total    = n(),
    n_missing  = sum(is.na(temp_zone)),
    n_valid    = sum(!is.na(temp_zone)),
    pct_missing = round(n_missing / n_total * 100, 1),
    # Zone percentages as % of VALID observations
    pct_meso   = round(sum(temp_zone == "Mesophilic",    na.rm = TRUE) / n_valid * 100, 1),
    pct_trans  = round(sum(temp_zone == "Transition",    na.rm = TRUE) / n_valid * 100, 1),
    pct_thermo = round(sum(temp_zone == "Thermophilic",  na.rm = TRUE) / n_valid * 100, 1),
    pct_hyper  = round(sum(temp_zone == "Hyper-thermophilic", na.rm = TRUE) / n_valid * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(Pile = ifelse(Pile == "C", "Control", "Experimental")) %>%
  arrange(Pile, Chamber_Corrected)

# ============================================================
# ZONE SUMMARY WITH MISSING DATA - BY PILE
# ============================================================

zone_summary_pile <- Cleaned_Data %>%
  mutate(Pile = ifelse(Pile == "C", "Control", "Experimental")) %>%
  group_by(Pile) %>%
  summarise(
    n_total     = n(),
    n_missing   = sum(is.na(temp_zone)),
    n_valid     = sum(!is.na(temp_zone)),
    pct_missing = round(n_missing / n_total * 100, 1),
    pct_meso    = round(sum(temp_zone == "Mesophilic",         na.rm = TRUE) / n_valid * 100, 1),
    pct_trans   = round(sum(temp_zone == "Transition",         na.rm = TRUE) / n_valid * 100, 1),
    pct_thermo  = round(sum(temp_zone == "Thermophilic",       na.rm = TRUE) / n_valid * 100, 1),
    pct_hyper   = round(sum(temp_zone == "Hyper-thermophilic", na.rm = TRUE) / n_valid * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(Chamber_Corrected = Pile)

# ============================================================
# COMBINE AND RENDER
# ============================================================

combined_table <- bind_rows(
  zone_summary_chamber %>%
    mutate(row_type = "chamber"),
  zone_summary_pile %>%
    mutate(
      Chamber_Corrected = "",
      row_type = "pile"
    )
) %>%
  arrange(Pile, row_type) %>%
  mutate(row_id = row_number())  # add row index before select

# Identify pile row numbers
pile_rows <- which(combined_table$row_type == "pile")

combined_table %>%
  select(Pile, Chamber_Corrected, n_total, n_valid,
         pct_missing, pct_meso, pct_trans, pct_thermo, pct_hyper) %>%
  gt() %>%
  cols_label(
    Pile                = "Pile",
    Chamber_Corrected   = "Chamber",
    n_total             = "Total Obs.",
    n_valid             = "Valid Obs.",
    pct_missing         = "% Missing",
    pct_meso            = "Mesophilic",
    pct_trans           = "Transition",
    pct_thermo          = "Thermophilic",
    pct_hyper           = "Hyper-therm."
  ) %>%
  tab_spanner(
    label   = "Temperature Zone (% of valid obs.)",
    columns = c(pct_meso, pct_trans, pct_thermo, pct_hyper)
  ) %>%
  tab_spanner(
    label   = "Data Quality",
    columns = c(n_total, n_valid, pct_missing)
  ) %>%
  fmt_number(
    columns  = c(pct_missing, pct_meso, pct_trans, pct_thermo, pct_hyper),
    decimals = 1,
    suffix   = "%"
  ) %>%
  fmt_number(
    columns  = c(n_total, n_valid),
    decimals = 0,
    use_seps = TRUE
  ) %>%
  tab_row_group(
    label = "Experimental Pile (C4–C6)",
    rows  = Pile == "Experimental"
  ) %>%
  tab_row_group(
    label = "Control Pile (C1–C3)",
    rows  = Pile == "Control"
  ) %>%
  tab_style(
    style     = cell_fill(color = "#f7f7f7"),
    locations = cells_body(rows = pile_rows)
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_body(rows = pile_rows)
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style     = cell_fill(color = "#ffe0e0"),
    locations = cells_body(
      columns = pct_missing,
      rows    = pct_missing > 15
    )
  ) %>%
  tab_header(
    title    = "Time Spent in Each Temperature Zone",
    subtitle = "By chamber and pile — percentages of valid observations with missing data reported"
  ) %>%
  tab_footnote(
    footnote  = "Zone percentages calculated from valid observations only. Pile rows (bold) show aggregated values across all chambers. Missing data highlighted in red where >15% of observations are missing.",
    locations = cells_title()
  ) %>%
  opt_table_font(font = "Times New Roman")

Pile	Chamber	Data Quality			Temperature Zone (% of valid obs.)
Time Spent in Each Temperature Zone¹
By chamber and pile — percentages of valid observations with missing data reported¹
Pile	Chamber	Total Obs.	Valid Obs.	% Missing	Mesophilic	Transition	Thermophilic	Hyper-therm.
Control Pile (C1–C3)
Control	C1	3,203	3,202	0.0	1.0	2.2	96.7	0.1
Control	C2	3,663	2,887	21.2	8.9	10.4	69.6	11.1
Control	C3	3,108	3,103	0.2	9.3	14.1	75.1	1.6
Control		9,974	9,192	7.8	6.3	8.8	80.9	4.0
Experimental Pile (C4–C6)
Experimental	C4	3,624	3,358	7.3	26.0	1.3	68.9	3.8
Experimental	C5	3,695	2,327	37.0	0.0	8.2	84.7	7.0
Experimental	C6	3,797	3,735	1.6	16.4	19.9	62.6	1.2
Experimental		11,116	9,420	15.3	15.8	10.4	70.3	3.5
¹ Zone percentages calculated from valid observations only. Pile rows (bold) show aggregated values across all chambers. Missing data highlighted in red where >15% of observations are missing.

# ============================================================
# STEP 1 - Daily averages per chamber
# A valid day requires >= 5 raw observations
# ============================================================

# Get study period range
study_start <- min(Cleaned_Data$DOY, na.rm = TRUE)
study_end   <- max(Cleaned_Data$DOY, na.rm = TRUE)
all_days    <- study_start:study_end

# Daily summary per chamber
daily_chamber <- Cleaned_Data %>%
  mutate(DOY_day = floor(DOY)) %>%
  group_by(Chamber_Corrected, Pile, DOY_day) %>%
  summarise(
    n_obs      = n(),
    daily_mean = mean(pile_temp, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  mutate(
    valid     = n_obs >= 5,
    daily_mean = ifelse(valid, daily_mean, NA_real_),
    # Classify zone from daily mean
    temp_zone = case_when(
      daily_mean < 40                       ~ "Mesophilic",
      daily_mean >= 40 & daily_mean < 45    ~ "Transition",
      daily_mean >= 45 & daily_mean <= 70   ~ "Thermophilic",
      daily_mean > 70                       ~ "Hyper-thermophilic",
      TRUE                                  ~ NA_character_
    ),
    temp_zone = factor(temp_zone,
                       levels = c("Mesophilic", "Transition",
                                  "Thermophilic", "Hyper-thermophilic"))
  )

# Add missing days — days with no data at all
all_chamber_days <- expand.grid(
  Chamber_Corrected = unique(Cleaned_Data$Chamber_Corrected),
  DOY_day           = all_days
) %>%
  left_join(
    Cleaned_Data %>% 
      select(Chamber_Corrected, Pile) %>% 
      distinct(),
    by = "Chamber_Corrected"
  )

daily_chamber_full <- all_chamber_days %>%
  left_join(daily_chamber, by = c("Chamber_Corrected", "Pile", "DOY_day")) %>%
  mutate(
    valid     = ifelse(is.na(valid), FALSE, valid),
    n_obs     = ifelse(is.na(n_obs), 0L, n_obs)
  )

# ============================================================
# STEP 2 - Daily averages per pile
# Average valid chamber means per day
# ============================================================

daily_pile <- daily_chamber_full %>%
  mutate(Pile = ifelse(Pile == "C", "Control", "Experimental")) %>%
  group_by(Pile, DOY_day) %>%
  summarise(
    n_chambers_valid = sum(valid, na.rm = TRUE),
    pile_daily_mean  = mean(daily_mean, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    pile_daily_mean = ifelse(n_chambers_valid == 0, NA_real_, pile_daily_mean),
    valid = n_chambers_valid > 0,
    temp_zone = case_when(
      pile_daily_mean < 40                          ~ "Mesophilic",
      pile_daily_mean >= 40 & pile_daily_mean < 45  ~ "Transition",
      pile_daily_mean >= 45 & pile_daily_mean <= 70 ~ "Thermophilic",
      pile_daily_mean > 70                          ~ "Hyper-thermophilic",
      TRUE                                          ~ NA_character_
    ),
    temp_zone = factor(temp_zone,
                       levels = c("Mesophilic", "Transition",
                                  "Thermophilic", "Hyper-thermophilic"))
  )

# ============================================================
# STEP 3 - Zone summary by chamber
# ============================================================

zone_summary_chamber <- daily_chamber_full %>%
  mutate(Pile = ifelse(Pile == "C", "Control", "Experimental")) %>%
  group_by(Chamber_Corrected, Pile) %>%
  summarise(
    n_days_total   = n(),
    n_days_missing = sum(!valid),
    n_days_valid   = sum(valid),
    pct_missing    = round(n_days_missing / n_days_total * 100, 1),
    pct_meso       = round(sum(temp_zone == "Mesophilic",          na.rm = TRUE) / n_days_valid * 100, 1),
    pct_trans      = round(sum(temp_zone == "Transition",          na.rm = TRUE) / n_days_valid * 100, 1),
    pct_thermo     = round(sum(temp_zone == "Thermophilic",        na.rm = TRUE) / n_days_valid * 100, 1),
    pct_hyper      = round(sum(temp_zone == "Hyper-thermophilic",  na.rm = TRUE) / n_days_valid * 100, 1),
    .groups = "drop"
  ) %>%
  arrange(Pile, Chamber_Corrected)

# ============================================================
# STEP 4 - Zone summary by pile
# ============================================================

zone_summary_pile <- daily_pile %>%
  group_by(Pile) %>%
  summarise(
    n_days_total   = n(),
    n_days_missing = sum(!valid),
    n_days_valid   = sum(valid),
    pct_missing    = round(n_days_missing / n_days_total * 100, 1),
    pct_meso       = round(sum(temp_zone == "Mesophilic",         na.rm = TRUE) / n_days_valid * 100, 1),
    pct_trans      = round(sum(temp_zone == "Transition",         na.rm = TRUE) / n_days_valid * 100, 1),
    pct_thermo     = round(sum(temp_zone == "Thermophilic",       na.rm = TRUE) / n_days_valid * 100, 1),
    pct_hyper      = round(sum(temp_zone == "Hyper-thermophilic", na.rm = TRUE) / n_days_valid * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(Chamber_Corrected = "")

# ============================================================
# STEP 5 - Combine and render table
# ============================================================

combined_table <- bind_rows(
  zone_summary_chamber %>% mutate(row_type = "chamber"),
  zone_summary_pile    %>% mutate(row_type = "pile")
) %>%
  arrange(Pile, row_type) %>%
  mutate(row_id = row_number())

pile_rows <- which(combined_table$row_type == "pile")

combined_table %>%
  mutate(
    # Combine pile and chamber into one label
    Label = case_when(
      row_type == "pile"    ~ paste0(Pile, " (mean)"),
      TRUE                  ~ Chamber_Corrected
    )
  ) %>%
  select(Label, Pile, n_days_total, n_days_valid,
         pct_missing, pct_meso, pct_trans, pct_thermo, pct_hyper) %>%
  gt() %>%
  cols_label(
    Label          = "Chamber",
    n_days_total   = "Days",
    n_days_valid   = "Valid",
    pct_missing    = "Missing (%)",
    pct_meso       = "Meso.",
    pct_trans      = "Trans.",
    pct_thermo     = "Thermo.",
    pct_hyper      = "Hyper."
  ) %>%
  cols_hide(Pile) %>%
  tab_spanner(
    label   = "Zone (% valid days)",
    columns = c(pct_meso, pct_trans, pct_thermo, pct_hyper)
  ) %>%
  fmt_number(
    columns  = c(pct_missing, pct_meso, pct_trans, pct_thermo, pct_hyper),
    decimals = 1
  ) %>%
  fmt_number(
    columns  = c(n_days_total, n_days_valid),
    decimals = 0,
    use_seps = FALSE
  ) %>%
  tab_row_group(
    label = "Experimental (C4–C6)",
    rows  = Pile == "Experimental"
  ) %>%
  tab_row_group(
    label = "Control (C1–C3)",
    rows  = Pile == "Control"
  ) %>%
  tab_style(
    style     = cell_fill(color = "#f7f7f7"),
    locations = cells_body(rows = pile_rows)
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_body(rows = pile_rows)
  ) %>%
  tab_style(
    style     = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style     = cell_fill(color = "#ffe0e0"),
    locations = cells_body(
      columns = pct_missing,
      rows    = pct_missing > 100
    )
  ) %>%
  tab_header(
    title    = "Temperature Zone Classification by Chamber and Pile",
    subtitle = "Daily mean temperature — days with <5 observations excluded"
  ) %>%
  tab_footnote(
    footnote  = "Meso. = Mesophilic (<40°C); Trans. = Transition (40–45°C); Thermo. = Thermophilic (45–70°C); Hyper. = Hyper-thermophilic (>70°C). Bold rows show pile averages. Missing threshold raised to 20%.",
    locations = cells_title()
  ) %>%
  opt_table_font(font = "Times New Roman") %>%
  tab_options(table.width = pct(100))

Chamber	Days	Valid	Missing (%)	Zone (% valid days)
Temperature Zone Classification by Chamber and Pile¹
Daily mean temperature — days with <5 observations excluded¹
Chamber	Days	Valid	Missing (%)	Meso.	Trans.	Thermo.	Hyper.
Control (C1–C3)
C1	145	111	23.4	1.8	1.8	96.4	0.0
C2	145	113	22.1	10.6	8.0	60.2	7.1
C3	145	108	25.5	9.3	15.7	74.1	0.9
Control (mean)	145	121	16.6	2.5	8.3	89.3	0.0
Experimental (C4–C6)
C4	145	113	22.1	30.1	1.8	58.4	3.5
C5	145	119	17.9	0.0	5.9	47.9	3.4
C6	145	120	17.2	11.7	20.0	67.5	0.8
Experimental (mean)	145	121	16.6	16.5	9.1	71.1	3.3
¹ Meso. = Mesophilic (<40°C); Trans. = Transition (40–45°C); Thermo. = Thermophilic (45–70°C); Hyper. = Hyper-thermophilic (>70°C). Bold rows show pile averages. Missing threshold raised to 20%.

# ============================================================
# PLOT - Zone by DOY daily averages per chamber
# ============================================================

# Add pile label to daily_chamber_full
daily_chamber_plot <- daily_chamber_full %>%
  mutate(Pile = ifelse(Pile == "C", "Control", "Experimental")) %>%
  filter(valid)  # only valid days

# Individual chamber plots
chamber_zone_plots <- daily_chamber_plot %>%
  split(.$Chamber_Corrected) %>%
  imap(~ ggplot(.x, aes(x = DOY_day, y = daily_mean, colour = temp_zone)) +
         geom_point(size = 1.5, alpha = 0.8) +
         geom_hline(yintercept = c(40, 45, 70),
                    linetype  = "dashed",
                    colour    = "grey40",
                    linewidth = 0.3) +
         geom_vline(xintercept = turns_both_red,
                    colour    = "#9b1c31",
                    linetype  = "solid",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         geom_vline(xintercept = turns_control_black,
                    colour    = "#000000",
                    linetype  = "dashed",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         scale_colour_manual(values = zone_cols, na.value = "grey80",
                             drop = FALSE) +
         theme_bw(base_size = 11) +
         theme(
           legend.position  = "none",
           panel.grid.minor = element_blank(),
           panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
         ) +
         labs(x     = "Day of year",
              y     = expression("Daily mean temperature (°C)"),
              title = .y)
  )

# Pile average plots
pile_zone_plots <- daily_pile %>%
  filter(valid) %>%
  split(.$Pile) %>%
  imap(~ ggplot(.x, aes(x = DOY_day, y = pile_daily_mean, colour = temp_zone)) +
         geom_point(size = 1.5, alpha = 0.8) +
         geom_hline(yintercept = c(40, 45, 70),
                    linetype  = "dashed",
                    colour    = "grey40",
                    linewidth = 0.3) +
         geom_vline(xintercept = turns_both_red,
                    colour    = "#9b1c31",
                    linetype  = "solid",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         geom_vline(xintercept = turns_control_black,
                    colour    = "#000000",
                    linetype  = "dashed",
                    linewidth = 0.3,
                    alpha     = 0.6) +
         scale_colour_manual(values = zone_cols, na.value = "grey80",
                             drop = FALSE) +
         theme_bw(base_size = 11) +
         theme(
           legend.position  = "none",
           panel.grid.minor = element_blank(),
           panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
         ) +
         labs(x     = "Day of year",
              y     = expression("Daily mean temperature (°C)"),
              title = paste(.y, "pile average"))
  )

# Shared legend
legend_plot <- ggplot(daily_chamber_plot %>% filter(!is.na(temp_zone)),
                      aes(x = DOY_day, y = daily_mean, colour = temp_zone)) +
  geom_point() +
  scale_colour_manual(values = zone_cols, name = "Zone", drop = FALSE) +
  theme(legend.position = "top") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1)))

shared_legend <- cowplot::get_legend(legend_plot)

# ============================================================
# ARRANGE - chambers left/right, pile averages at bottom
# ============================================================

left_col  <- chamber_zone_plots[["C1"]] / 
             chamber_zone_plots[["C2"]] / 
             chamber_zone_plots[["C3"]]

right_col <- chamber_zone_plots[["C4"]] / 
             chamber_zone_plots[["C5"]] / 
             chamber_zone_plots[["C6"]]

pile_row  <- pile_zone_plots[["Control"]] | 
             pile_zone_plots[["Experimental"]]

fig_zones_daily <- wrap_elements(shared_legend) /
                   (left_col | right_col) /
                   pile_row +
                   plot_layout(heights = c(0.05, 1, 0.35))

fig_zones_daily

## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 51 rows containing missing values or values outside the scale range
## (`geom_point()`).

Had Error as outdir not found but this was to export tables

2024 data - looking at transformations in 2024 chem data

Code_for_Figures_11_04_2026

Daniel Kaufman

2026-4-11

adding chamber elevation to the model as this impacts flux magnitude and does not have to do with mechanism or treatment: