files_today <- c(
  "GreenFeed_Cleaned_K1K2.xlsx",
  "GreenFeed_Cleaned_K1K2.csv",
  "3SD_qualified.xlsx",
  "raw_data_for_diagnostics.csv",
  "normality_by_treatment_shapiro.csv",
  "homogeneity_of_variance_tests.csv",
  "french_covariate_screening_results.csv",
  "lsmeans_CH4_by_cow.csv",
  "lsmeans_CO2_by_cow.csv",
  "correlation_results.csv"
)

existing <- file.exists(files_today)
kable(data.frame(file = files_today, exists = existing), caption = "Analysis files in working folder")

Analysis files in working folder
file	exists
GreenFeed_Cleaned_K1K2.xlsx	TRUE
GreenFeed_Cleaned_K1K2.csv	TRUE
3SD_qualified.xlsx	TRUE
raw_data_for_diagnostics.csv	TRUE
normality_by_treatment_shapiro.csv	TRUE
homogeneity_of_variance_tests.csv	TRUE
french_covariate_screening_results.csv	TRUE
lsmeans_CH4_by_cow.csv	TRUE
lsmeans_CO2_by_cow.csv	TRUE
correlation_results.csv	TRUE

# Greenfeed usage K1 vs K2
library(readr)
library(readxl)
library(dplyr)

# -----------------------------
# Numerator: cows with GreenFeed records after 3SD
# -----------------------------
after3sd <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment))
  ) %>%
  distinct(cow_id, grazing_treatment)

# -----------------------------
# Denominator: all study cows (from age list)
# -----------------------------
all_cows <- read_excel(
  "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex/K1_K2_Cows_With_Age.xlsx",
  sheet = "K1_K2_Age_Data"
) %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment))
  ) %>%
  distinct(cow_id, grazing_treatment)

# -----------------------------
# % cows that used GreenFeed (after 3SD)
# -----------------------------
usage_tbl <- all_cows %>%
  left_join(after3sd %>% mutate(used_greenfeed = 1L), by = c("cow_id","grazing_treatment")) %>%
  mutate(used_greenfeed = ifelse(is.na(used_greenfeed), 0L, used_greenfeed)) %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_total_cows = n_distinct(cow_id),
    n_used_greenfeed_after3SD = sum(used_greenfeed),
    pct_used_greenfeed_after3SD = 100 * n_used_greenfeed_after3SD / n_total_cows,
    .groups = "drop"
  )

overall_tbl <- usage_tbl %>%
  summarise(
    grazing_treatment = "Overall",
    n_total_cows = sum(n_total_cows),
    n_used_greenfeed_after3SD = sum(n_used_greenfeed_after3SD),
    pct_used_greenfeed_after3SD = 100 * n_used_greenfeed_after3SD / n_total_cows
  )

print(usage_tbl)

## # A tibble: 2 × 4
##   grazing_treatment n_total_cows n_used_greenfeed_after…¹ pct_used_greenfeed_a…²
##   <chr>                    <int>                    <int>                  <dbl>
## 1 K1                          36                       20                   55.6
## 2 K2                          26                       17                   65.4
## # ℹ abbreviated names: ¹n_used_greenfeed_after3SD, ²pct_used_greenfeed_after3SD

print(overall_tbl)

## # A tibble: 1 × 4
##   grazing_treatment n_total_cows n_used_greenfeed_after…¹ pct_used_greenfeed_a…²
##   <chr>                    <int>                    <int>                  <dbl>
## 1 Overall                     62                       37                   59.7
## # ℹ abbreviated names: ¹n_used_greenfeed_after3SD, ²pct_used_greenfeed_after3SD

write_csv(usage_tbl, "greenfeed_usage_percent_K1_K2_after3SD.csv")
write_csv(overall_tbl, "greenfeed_usage_percent_overall_after3SD.csv")

# Plot for greenfeed usage
# Greenfeed usage K1 vs K2 (100% stacked bar: Used vs No use)
library(readr)
library(readxl)
library(dplyr)
library(ggplot2)

# Numerator: cows with GreenFeed records after 3SD
after3sd <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment))
  ) %>%
  distinct(cow_id, grazing_treatment)

# Denominator: all study cows
all_cows <- read_excel(
  "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex/K1_K2_Cows_With_Age.xlsx",
  sheet = "K1_K2_Age_Data"
) %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment))
  ) %>%
  distinct(cow_id, grazing_treatment)

# Build usage table for plotting
usage_cow <- all_cows %>%
  left_join(after3sd %>% mutate(usage_status = "Used"), by = c("cow_id","grazing_treatment")) %>%
  mutate(usage_status = ifelse(is.na(usage_status), "No use", "Used"))

plot_df <- usage_cow %>%
  group_by(grazing_treatment, usage_status) %>%
  summarise(n_cows = n(), .groups = "drop") %>%
  group_by(grazing_treatment) %>%
  mutate(
    pct = 100 * n_cows / sum(n_cows),
    usage_status = factor(usage_status, levels = c("Used", "No use"))
  ) %>%
  ungroup()

# Plot
p <- ggplot(plot_df, aes(x = grazing_treatment, y = pct, fill = usage_status)) +
  geom_col(width = 0.68, color = "white", linewidth = 0.8) +
  geom_text(
    aes(label = paste0(round(pct, 1), "%\n(n=", n_cows, ")")),
    position = position_stack(vjust = 0.5),
    size = 4
  ) +
  scale_fill_manual(values = c("Used" = "forestgreen", "No use" = "grey75")) +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20),
                     labels = function(x) paste0(x, "%")) +
  labs(
    title = "GreenFeed Usage by Treatment (After 3SD)",
    x = "Grazing treatment",
    y = "Percentage of cows",
    fill = "Usage"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

ggsave("GreenFeed_usage_100pct_bar_after3SD.png", p, width = 8, height = 6, dpi = 320)
print(p)

write_csv(plot_df, "GreenFeed_usage_100pct_plotdata_after3SD.csv")

library(readr)
library(dplyr)
library(ggplot2)

# Load after-3SD enriched data
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
  mutate(
    grazing_treatment = toupper(trimws(grazing_treatment)),
    start_time = as.numeric(start_time),
    visit_date = as.Date("1899-12-30") + floor(start_time)
  ) %>%
  filter(grazing_treatment %in% c("K1", "K2"))

# -----------------------------
# Visits per day by treatment
# -----------------------------
visits_day <- dat %>%
  group_by(visit_date, grazing_treatment) %>%
  summarise(n_visits = n(), .groups = "drop") %>%
  arrange(visit_date, grazing_treatment)

# Print table
print(visits_day, n = 100)

## # A tibble: 139 × 3
##     visit_date grazing_treatment n_visits
##     <date>     <chr>                <int>
##   1 2025-06-27 K2                      13
##   2 2025-06-28 K2                      15
##   3 2025-06-29 K2                      34
##   4 2025-06-30 K1                      21
##   5 2025-06-30 K2                      38
##   6 2025-07-01 K1                      11
##   7 2025-07-01 K2                      27
##   8 2025-07-02 K2                      38
##   9 2025-07-03 K2                      11
##  10 2025-07-04 K1                      20
##  11 2025-07-04 K2                      17
##  12 2025-07-05 K1                      45
##  13 2025-07-05 K2                      21
##  14 2025-07-06 K1                      34
##  15 2025-07-06 K2                      22
##  16 2025-07-07 K1                      38
##  17 2025-07-07 K2                      30
##  18 2025-07-08 K1                      45
##  19 2025-07-08 K2                      28
##  20 2025-07-09 K1                      44
##  21 2025-07-09 K2                      42
##  22 2025-07-10 K1                      25
##  23 2025-07-10 K2                      24
##  24 2025-07-11 K1                      36
##  25 2025-07-11 K2                       1
##  26 2025-07-12 K1                      40
##  27 2025-07-12 K2                       2
##  28 2025-07-13 K1                      22
##  29 2025-07-13 K2                      30
##  30 2025-07-14 K1                      15
##  31 2025-07-14 K2                      23
##  32 2025-07-15 K1                      17
##  33 2025-07-15 K2                      13
##  34 2025-07-16 K1                      10
##  35 2025-07-16 K2                      26
##  36 2025-07-17 K1                      26
##  37 2025-07-17 K2                       6
##  38 2025-07-18 K1                      11
##  39 2025-07-18 K2                       6
##  40 2025-07-19 K2                       8
##  41 2025-07-20 K2                      20
##  42 2025-07-21 K1                       8
##  43 2025-07-21 K2                      17
##  44 2025-07-22 K1                      11
##  45 2025-07-22 K2                       6
##  46 2025-07-23 K1                       6
##  47 2025-07-23 K2                       8
##  48 2025-07-24 K1                      13
##  49 2025-07-24 K2                      32
##  50 2025-07-25 K1                       7
##  51 2025-07-25 K2                      23
##  52 2025-07-26 K1                      28
##  53 2025-07-26 K2                      31
##  54 2025-07-27 K1                      25
##  55 2025-07-27 K2                      16
##  56 2025-07-28 K1                      19
##  57 2025-07-28 K2                      16
##  58 2025-07-29 K1                      26
##  59 2025-07-29 K2                      34
##  60 2025-07-30 K1                      21
##  61 2025-07-30 K2                      23
##  62 2025-07-31 K1                      36
##  63 2025-07-31 K2                      30
##  64 2025-08-01 K1                      33
##  65 2025-08-01 K2                      11
##  66 2025-08-02 K1                      15
##  67 2025-08-02 K2                      23
##  68 2025-08-03 K2                      30
##  69 2025-08-04 K1                       1
##  70 2025-08-04 K2                      26
##  71 2025-08-05 K2                      10
##  72 2025-08-06 K2                      31
##  73 2025-08-07 K2                      27
##  74 2025-08-08 K1                      28
##  75 2025-08-08 K2                      11
##  76 2025-08-09 K1                      44
##  77 2025-08-09 K2                      24
##  78 2025-08-10 K1                      44
##  79 2025-08-10 K2                      35
##  80 2025-08-11 K1                      41
##  81 2025-08-11 K2                      35
##  82 2025-08-12 K1                      55
##  83 2025-08-12 K2                      31
##  84 2025-08-13 K1                      28
##  85 2025-08-13 K2                       8
##  86 2025-08-14 K1                      43
##  87 2025-08-14 K2                      23
##  88 2025-08-15 K1                      37
##  89 2025-08-15 K2                      23
##  90 2025-08-16 K1                      52
##  91 2025-08-16 K2                      33
##  92 2025-08-17 K1                      42
##  93 2025-08-17 K2                       9
##  94 2025-08-18 K1                      35
##  95 2025-08-18 K2                      25
##  96 2025-08-19 K1                      35
##  97 2025-08-19 K2                      29
##  98 2025-08-20 K1                      37
##  99 2025-08-20 K2                      26
## 100 2025-08-21 K1                      37
## # ℹ 39 more rows

# Save
write_csv(visits_day, "visits_per_day_K1_K2_after3SD.csv")

# -----------------------------
# Daily summary stats by treatment
# -----------------------------
visits_summary <- visits_day %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_days = n(),
    mean_visits_per_day = mean(n_visits, na.rm = TRUE),
    sd_visits_per_day = sd(n_visits, na.rm = TRUE),
    min_visits_per_day = min(n_visits, na.rm = TRUE),
    max_visits_per_day = max(n_visits, na.rm = TRUE),
    .groups = "drop"
  )

print(visits_summary)

## # A tibble: 2 × 6
##   grazing_treatment n_days mean_visits_per_day sd_visits_per_day
##   <chr>              <int>               <dbl>             <dbl>
## 1 K1                    64                27.5              12.2
## 2 K2                    75                21.3              10.3
## # ℹ 2 more variables: min_visits_per_day <int>, max_visits_per_day <int>

write_csv(visits_summary, "visits_per_day_summary_K1_K2_after3SD.csv")

# -----------------------------
# Plot (same treatment colors)
# -----------------------------
p <- ggplot(visits_day, aes(x = visit_date, y = n_visits, color = grazing_treatment)) +
  geom_line(linewidth = 0.9, alpha = 0.9) +
  geom_point(size = 1.5, alpha = 0.8) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(
    title = "Number of Visits per Day by Treatment (After 3SD)",
    x = "Date",
    y = "Visits per Day",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 12)

print(p)

ggsave("visits_per_day_K1_K2_after3SD.png", p, width = 10, height = 5.5, dpi = 320)

Descriptive Statistics (Retained Observations)

Descriptive statistics were calculated from 3SD_qualified.xlsx and raw_data_for_diagnostics.csv.

raw_df <- read_csv("raw_data_for_diagnostics.csv", show_col_types = FALSE)

num_vars <- c("CH4_g_d", "CO2_g_d", "start_time", "end_time", "duration")

desc_tbl <- raw_df %>%
  summarise(across(all_of(num_vars), list(
    n = ~sum(is.finite(.x)),
    mean = ~mean(.x, na.rm = TRUE),
    sd = ~sd(.x, na.rm = TRUE),
    min = ~min(.x, na.rm = TRUE),
    q25 = ~quantile(.x, 0.25, na.rm = TRUE),
    median = ~median(.x, na.rm = TRUE),
    q75 = ~quantile(.x, 0.75, na.rm = TRUE),
    max = ~max(.x, na.rm = TRUE)
  )))

kable(t(desc_tbl), caption = "Descriptive statistics (raw retained data)")

Descriptive statistics (raw retained data)
CH4_g_d_n	3.360000e+03
CH4_g_d_mean	3.211619e+02
CH4_g_d_sd	9.000961e+01
CH4_g_d_min	3.371882e+01
CH4_g_d_q25	2.585283e+02
CH4_g_d_median	3.195012e+02
CH4_g_d_q75	3.799020e+02
CH4_g_d_max	6.104030e+02
CO2_g_d_n	3.360000e+03
CO2_g_d_mean	1.058192e+04
CO2_g_d_sd	1.584730e+03
CO2_g_d_min	5.898440e+03
CO2_g_d_q25	9.492579e+03
CO2_g_d_median	1.055799e+04
CO2_g_d_q75	1.164660e+04
CO2_g_d_max	1.545856e+04
start_time_n	3.360000e+03
start_time_mean	4.587438e+04
start_time_sd	2.099552e+01
start_time_min	4.583562e+04
start_time_q25	4.585385e+04
start_time_median	4.587869e+04
start_time_q75	4.589175e+04
start_time_max	4.590933e+04
end_time_n	3.360000e+03
end_time_mean	4.587438e+04
end_time_sd	2.099552e+01
end_time_min	4.583562e+04
end_time_q25	4.585386e+04
end_time_median	4.587869e+04
end_time_q75	4.589175e+04
end_time_max	4.590933e+04
duration_n	3.360000e+03
duration_mean	2.915100e-03
duration_sd	8.551000e-04
duration_min	1.400500e-03
duration_q25	2.500000e-03
duration_median	2.881900e-03
duration_q75	3.217600e-03
duration_max	1.745370e-02

Normality Diagnostics

Shapiro-Wilk by Treatment (K1 vs K2)

shapiro_trt <- read_csv("normality_by_treatment_shapiro.csv", show_col_types = FALSE)
shapiro_trt <- shapiro_trt %>%
  mutate(normality_at_0_05 = ifelse(p_value > 0.05, "Normal", "Non-normal"))

kable(shapiro_trt, digits = 6, caption = "Shapiro-Wilk normality by treatment")

Shapiro-Wilk normality by treatment
variable	treatment	n	W	p_value	normality_at_0_05
CH4_g_d	K1	1759	0.998723	0.225383	Normal
CH4_g_d	K2	1601	0.995382	0.000079	Non-normal
CO2_g_d	K1	1759	0.998496	0.121089	Normal
CO2_g_d	K2	1601	0.995022	0.000036	Non-normal

QQ Plots and Histograms

# Pre-diagnostics normality check
library(readr)

df <- read_csv("raw_data_for_diagnostics.csv", show_col_types = FALSE)
df$tr <- toupper(trimws(df$grazing_treatment))

cols <- c(K1 = "forestgreen", K2 = "steelblue")

plot_normality_by_trt <- function(trt, color) {
  x_ch4 <- as.numeric(df[df$tr == trt, ][["CH4_g_d"]])
  x_co2 <- as.numeric(df[df$tr == trt, ][["CO2_g_d"]])

  x_ch4 <- x_ch4[is.finite(x_ch4)]
  x_co2 <- x_co2[is.finite(x_co2)]

  old_par <- par(no.readonly = TRUE)
  on.exit(par(old_par))
  par(mfrow = c(2, 2), mar = c(4, 4, 2, 1))

  hist(x_ch4, breaks = "FD", col = color, border = "white",
       main = paste(trt, "CH4 Histogram"), xlab = "CH4_g_d")
  qqnorm(x_ch4, pch = 19, cex = 0.6, col = color,
         main = paste(trt, "CH4 QQ Plot"))
  qqline(x_ch4, col = "black", lwd = 2)

  hist(x_co2, breaks = "FD", col = color, border = "white",
       main = paste(trt, "CO2 Histogram"), xlab = "CO2_g_d")
  qqnorm(x_co2, pch = 19, cex = 0.6, col = color,
         main = paste(trt, "CO2 QQ Plot"))
  qqline(x_co2, col = "black", lwd = 2)
}

# View K1 only
plot_normality_by_trt("K1", cols["K1"])

# View K2 only
plot_normality_by_trt("K2", cols["K2"])

# LSMeans Calculation for CH4
library(readr)
library(dplyr)
library(lme4)
library(lmerTest)
library(emmeans)

# -----------------------------
# Load after-3SD enriched data
# -----------------------------
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE)

hms_to_minutes <- function(x) {
  x <- as.character(x)
  p <- strsplit(x, ":", fixed = TRUE)
  sapply(p, function(v) {
    if (length(v) != 3) return(NA_real_)
    as.numeric(v[1]) * 60 + as.numeric(v[2]) + as.numeric(v[3]) / 60
  })
}

excel_origin <- as.Date("1899-12-30")

dat2 <- dat %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(trimws(grazing_treatment))),
    start_time = as.numeric(start_time),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d),
    hour_of_the_day = as.numeric(hour_of_the_day),
    date = factor(excel_origin + floor(start_time)),
    hour = factor(round(hour_of_the_day)),
    duration_min = hms_to_minutes(emission_duration)
  )

# -----------------------------
# Final LSMeans models (after 3SD)
# -----------------------------
d_ch4 <- dat2 %>%
  filter(is.finite(CH4_g_d), is.finite(duration_min), !is.na(date), !is.na(hour))

d_co2 <- dat2 %>%
  filter(is.finite(CO2_g_d), is.finite(duration_min), !is.na(date), !is.na(hour))

m_ch4 <- lmer(
  CH4_g_d ~ cow_id + duration_min + (1 | cow_id:date) + (1 | cow_id:hour),
  data = d_ch4,
  REML = TRUE
)

m_co2 <- lmer(
  CO2_g_d ~ cow_id + duration_min + (1 | cow_id:date) + (1 | cow_id:hour),
  data = d_co2,
  REML = TRUE
)

# -----------------------------
# Extract cow-level LSMeans
# -----------------------------
lsm_ch4 <- as.data.frame(summary(emmeans(m_ch4, ~ cow_id))) %>%
  rename(
    CH4_lsmean = emmean,
    CH4_SE = SE,
    CH4_LCL = asymp.LCL,
    CH4_UCL = asymp.UCL
  ) %>%
  select(cow_id, CH4_lsmean, CH4_SE, CH4_LCL, CH4_UCL)

lsm_co2 <- as.data.frame(summary(emmeans(m_co2, ~ cow_id))) %>%
  rename(
    CO2_lsmean = emmean,
    CO2_SE = SE,
    CO2_LCL = asymp.LCL,
    CO2_UCL = asymp.UCL
  ) %>%
  select(cow_id, CO2_lsmean, CO2_SE, CO2_LCL, CO2_UCL)

# Add treatment + visit counts
visits <- dat2 %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(n_visits = n(), .groups = "drop")

cow_lsmeans <- lsm_ch4 %>%
  inner_join(lsm_co2, by = "cow_id") %>%
  left_join(visits, by = "cow_id") %>%
  arrange(grazing_treatment, cow_id)

# Optional precision weights
cow_lsmeans <- cow_lsmeans %>%
  mutate(
    weight_ch4_inv_se2 = 1 / (CH4_SE^2),
    weight_co2_inv_se2 = 1 / (CO2_SE^2)
  )

# Save
write_csv(cow_lsmeans, "cow_lsmeans_CH4_CO2_3SD.csv")

# Preview
cat("Number of cows:", nrow(cow_lsmeans), "\n")

## Number of cows: 37

print(head(cow_lsmeans, 12))

##    cow_id CH4_lsmean    CH4_SE  CH4_LCL  CH4_UCL CO2_lsmean   CO2_SE   CO2_LCL
## 1    258K   248.9888  9.103959 231.1454 266.8323   8956.142 163.4676  8635.751
## 2    264F   290.6588  9.063870 272.8939 308.4236  10765.731 162.7390 10446.768
## 3    266H   345.1719  9.909498 325.7496 364.5942  11351.175 175.4951 11007.211
## 4    286J   290.2081  9.512178 271.5645 308.8516   9988.413 169.9603  9655.296
## 5    290G   355.6625 10.129619 335.8088 375.5162  12211.914 179.4756 11860.149
## 6    306K   220.2244 10.765321 199.1248 241.3241   8339.988 189.0924  7969.374
## 7    312E   325.7233  9.892952 306.3334 345.1131  10900.323 175.6940 10555.969
## 8    314J   317.1428  8.796818 299.9013 334.3842  10474.481 158.8130 10163.213
## 9    322C   324.7291 11.695099 301.8072 347.6511  10863.710 201.5420 10468.695
## 10   322F   319.6655  9.785017 300.4872 338.8437  10764.357 174.8119 10421.732
## 11   338D   224.3099 22.130105 180.9357 267.6841  10350.566 373.4940  9618.531
## 12   338J   316.7353  9.357583 298.3948 335.0759  10490.753 166.9916 10163.455
##      CO2_UCL grazing_treatment n_visits weight_ch4_inv_se2 weight_co2_inv_se2
## 1   9276.532                K1      121        0.012065335       3.742283e-05
## 2  11084.693                K1      122        0.012172300       3.775865e-05
## 3  11695.139                K1       96        0.010183491       3.246907e-05
## 4  10321.529                K1      109        0.011051980       3.461824e-05
## 5  12563.680                K1       92        0.009745716       3.104481e-05
## 6   8710.602                K1       79        0.008628714       2.796739e-05
## 7  11244.677                K1       98        0.010217583       3.239562e-05
## 8  10785.748                K1      132        0.012922567       3.964861e-05
## 9  11258.725                K1       61        0.007311259       2.461892e-05
## 10 11106.982                K1      104        0.010444239       3.272337e-05
## 11 11082.601                K1       16        0.002041893       7.168572e-06
## 12 10818.050                K1      111        0.011420172       3.586002e-05

#Plot result for LSMeans
library(readr)
library(dplyr)
library(ggplot2)

# --------------------------------
# Load LSMeans output
# --------------------------------
lsm <- read_csv("cow_lsmeans_CH4_CO2_3SD.csv", show_col_types = FALSE)

# --------------------------------
# Plot function (well-spaced)
# --------------------------------
plot_lsm_by_trt <- function(df, trt, y_col, lcl_col, ucl_col, fill_col, title_txt, ylab_txt) {
  d <- df %>%
    filter(grazing_treatment == trt) %>%
    arrange(desc(.data[[y_col]])) %>%
    mutate(
      cow_label = paste0(cow_id, " (n=", n_visits, ")"),
      cow_label = factor(cow_label, levels = rev(cow_label)),
      mean_lbl = sprintf("%.1f", .data[[y_col]])
    )

  ggplot(d, aes(x = cow_label, y = .data[[y_col]])) +
    geom_col(fill = fill_col, alpha = 0.9, width = 0.72) +
    geom_errorbar(aes(ymin = .data[[lcl_col]], ymax = .data[[ucl_col]]),
                  width = 0.20, color = "black", linewidth = 0.5) +
    geom_text(aes(y = .data[[ucl_col]] + 0.03 * diff(range(.data[[y_col]], na.rm = TRUE)),
                  label = mean_lbl),
              hjust = 0, size = 3.1) +
    coord_flip(clip = "off") +
    scale_y_continuous(expand = expansion(mult = c(0.02, 0.20))) +
    labs(
      title = title_txt,
      x = "Cow (visit count)",
      y = ylab_txt
    ) +
    theme_minimal(base_size = 12) +
    theme(
      panel.grid.major.y = element_blank(),
      plot.margin = margin(10, 40, 10, 10)
    )
}

# --------------------------------
# CH4 plots (same colors as before)
# --------------------------------
p_ch4_k1 <- plot_lsm_by_trt(
  lsm, "K1",
  y_col = "CH4_lsmean", lcl_col = "CH4_LCL", ucl_col = "CH4_UCL",
  fill_col = "forestgreen",
  title_txt = "K1 Cow-level Mean CH4 (LSMEANS, after 3SD)",
  ylab_txt = "CH4 LSMEAN (g/day)"
)

p_ch4_k2 <- plot_lsm_by_trt(
  lsm, "K2",
  y_col = "CH4_lsmean", lcl_col = "CH4_LCL", ucl_col = "CH4_UCL",
  fill_col = "steelblue",
  title_txt = "K2 Cow-level Mean CH4 (LSMEANS, after 3SD)",
  ylab_txt = "CH4 LSMEAN (g/day)"
)

# Show
p_ch4_k1

p_ch4_k2

# Save
ggsave("CH4_lsmeans_3SD_K1.png", p_ch4_k1, width = 8.5, height = 10.5, dpi = 320)
ggsave("CH4_lsmeans_3SD_K2.png", p_ch4_k2, width = 8.5, height = 10.5, dpi = 320)

# Calculating mean CH4 using the Aritmetic mean approach
library(readr)
library(dplyr)

# -----------------------------
# Load data
# -----------------------------
df <- read_csv("GreenFeed_Cleaned_K1K2.csv", show_col_types = FALSE)

# -----------------------------
# Clean + standardize
# -----------------------------
df2 <- df %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    CH4_g_d = as.numeric(CH4_g_d)
  ) %>%
  filter(is.finite(CH4_g_d), grazing_treatment %in% c("K1", "K2"))

# -----------------------------
# Arithmetic mean CH4 per cow
# -----------------------------
cow_ch4_am <- df2 %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(
    n_visits = n(),
    CH4_mean_AM = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_visits),
    CH4_lcl_95 = CH4_mean_AM - qt(0.975, df = pmax(n_visits - 1, 1)) * CH4_se,
    CH4_ucl_95 = CH4_mean_AM + qt(0.975, df = pmax(n_visits - 1, 1)) * CH4_se,
    .groups = "drop"
  ) %>%
  arrange(grazing_treatment, cow_id)

# -----------------------------
# Save and preview
# -----------------------------
write_csv(cow_ch4_am, "cow_CH4_arithmetic_mean.csv")

cat("Number of cows:", nrow(cow_ch4_am), "\n")

## Number of cows: 37

print(head(cow_ch4_am, 12))

## # A tibble: 12 × 8
##    cow_id grazing_treatment n_visits CH4_mean_AM CH4_sd CH4_se CH4_lcl_95
##    <chr>  <chr>                <int>       <dbl>  <dbl>  <dbl>      <dbl>
##  1 258K   K1                     122        249.   77.8   7.04       235.
##  2 264F   K1                     122        290.   69.3   6.27       277.
##  3 266H   K1                      97        351.   89.1   9.05       333.
##  4 286J   K1                     109        290.   88.0   8.43       273.
##  5 290G   K1                      93        361.   90.3   9.36       343.
##  6 306K   K1                      81        215.   62.0   6.89       202.
##  7 312E   K1                      98        330.   79.2   8.00       314.
##  8 314J   K1                     135        325.   83.5   7.19       311.
##  9 322C   K1                      61        326.  100.   12.8        300.
## 10 322F   K1                     106        325.  100.    9.75       305.
## 11 338D   K1                      16        229.   93.0  23.2        179.
## 12 338J   K1                     111        319.   69.3   6.58       306.
## # ℹ 1 more variable: CH4_ucl_95 <dbl>

# Comparing AM and LS Mean
library(readr)
library(dplyr)

# Load files
lsm <- read_csv("cow_lsmeans_CH4_CO2_37.csv", show_col_types = FALSE)
am  <- read_csv("cow_CH4_arithmetic_mean.csv", show_col_types = FALSE)

# Merge CH4 metrics
cmp <- lsm %>%
  select(cow_id, CH4_lsmean) %>%
  inner_join(
    am %>% select(cow_id, grazing_treatment, n_visits, CH4_mean_AM),
    by = "cow_id"
  ) %>%
  mutate(
    diff = CH4_lsmean - CH4_mean_AM,
    abs_diff = abs(diff),
    rank_lsm = rank(-CH4_lsmean, ties.method = "average"),
    rank_am = rank(-CH4_mean_AM, ties.method = "average"),
    rank_shift = rank_lsm - rank_am,
    abs_rank_shift = abs(rank_shift)
  )

# Correlations
pear <- cor.test(cmp$CH4_lsmean, cmp$CH4_mean_AM, method = "pearson")
spear <- cor.test(cmp$rank_lsm, cmp$rank_am, method = "spearman", exact = FALSE)

# Bland-Altman style summary
mean_diff <- mean(cmp$diff, na.rm = TRUE)
sd_diff <- sd(cmp$diff, na.rm = TRUE)
loa_low <- mean_diff - 1.96 * sd_diff
loa_high <- mean_diff + 1.96 * sd_diff

# Top-k overlap
k <- 10
top_lsm <- cmp %>% arrange(desc(CH4_lsmean)) %>% slice(1:k) %>% pull(cow_id)
top_am  <- cmp %>% arrange(desc(CH4_mean_AM)) %>% slice(1:k) %>% pull(cow_id)
top_overlap <- length(intersect(top_lsm, top_am))

# By-treatment averages
by_treatment <- cmp %>%
  group_by(grazing_treatment) %>%
  summarise(
    CH4_lsmean_mean = mean(CH4_lsmean, na.rm = TRUE),
    CH4_AM_mean = mean(CH4_mean_AM, na.rm = TRUE),
    mean_diff = mean(diff, na.rm = TRUE),
    .groups = "drop"
  )

# Rank shift table
rank_shift_tbl <- cmp %>%
  arrange(desc(abs_rank_shift), cow_id) %>%
  select(
    cow_id, grazing_treatment, n_visits,
    CH4_lsmean, CH4_mean_AM, diff,
    rank_lsm, rank_am, rank_shift, abs_rank_shift
  )

# Summary table
summary_tbl <- tibble::tibble(
  metric = c(
    "n_cows",
    "mean_CH4_lsmean",
    "mean_CH4_AM",
    "mean_diff_LSM_minus_AM",
    "median_diff_LSM_minus_AM",
    "min_diff",
    "max_diff",
    "mean_abs_diff",
    "loa_low_95",
    "loa_high_95",
    "pearson_r",
    "pearson_p",
    "spearman_rho_rank",
    "spearman_p_rank",
    "top10_overlap",
    "mean_abs_rank_shift",
    "max_abs_rank_shift"
  ),
  value = c(
    nrow(cmp),
    mean(cmp$CH4_lsmean, na.rm = TRUE),
    mean(cmp$CH4_mean_AM, na.rm = TRUE),
    mean_diff,
    median(cmp$diff, na.rm = TRUE),
    min(cmp$diff, na.rm = TRUE),
    max(cmp$diff, na.rm = TRUE),
    mean(cmp$abs_diff, na.rm = TRUE),
    loa_low,
    loa_high,
    unname(pear$estimate),
    pear$p.value,
    unname(spear$estimate),
    spear$p.value,
    top_overlap,
    mean(cmp$abs_rank_shift, na.rm = TRUE),
    max(cmp$abs_rank_shift, na.rm = TRUE)
  )
)

# Write outputs
write_csv(cmp, "CH4_LSM_vs_AM_comparison_by_cow.csv")
write_csv(rank_shift_tbl, "CH4_LSM_vs_AM_rank_shifts.csv")
write_csv(by_treatment, "CH4_LSM_vs_AM_by_treatment.csv")
write_csv(summary_tbl, "CH4_LSM_vs_AM_summary_metrics.csv")

# Print
print(summary_tbl, n = nrow(summary_tbl))

## # A tibble: 17 × 2
##    metric                       value
##    <chr>                        <dbl>
##  1 n_cows                    3.7 e+ 1
##  2 mean_CH4_lsmean           3.16e+ 2
##  3 mean_CH4_AM               3.18e+ 2
##  4 mean_diff_LSM_minus_AM   -2.09e+ 0
##  5 median_diff_LSM_minus_AM -1.93e+ 0
##  6 min_diff                 -9.81e+ 0
##  7 max_diff                  2.56e+ 0
##  8 mean_abs_diff             2.50e+ 0
##  9 loa_low_95               -7.14e+ 0
## 10 loa_high_95               2.95e+ 0
## 11 pearson_r                 9.98e- 1
## 12 pearson_p                 2.95e-43
## 13 spearman_rho_rank         9.94e- 1
## 14 spearman_p_rank           1.23e-35
## 15 top10_overlap             1   e+ 1
## 16 mean_abs_rank_shift       8.11e- 1
## 17 max_abs_rank_shift        3   e+ 0

print(by_treatment)

## # A tibble: 2 × 4
##   grazing_treatment CH4_lsmean_mean CH4_AM_mean mean_diff
##   <chr>                       <dbl>       <dbl>     <dbl>
## 1 K1                           300.        303.     -3.00
## 2 K2                           335.        336.     -1.02

print(head(rank_shift_tbl, 15))

## # A tibble: 15 × 10
##    cow_id grazing_treatment n_visits CH4_lsmean CH4_mean_AM    diff rank_lsm
##    <chr>  <chr>                <dbl>      <dbl>       <dbl>   <dbl>    <dbl>
##  1 442B   K2                      45       306.        304.  2.34         24
##  2 264F   K1                     122       290.        290.  0.740        29
##  3 322F   K1                     106       318.        325. -6.30         20
##  4 326G   K2                     100       323.        323. -0.0713       17
##  5 364B   K1                      78       338.        347. -8.95         10
##  6 400G   K2                      67       319.        318.  0.184        19
##  7 404E   K2                      91       330.        330.  0.470        12
##  8 252H   K2                     134       381.        382. -1.27          2
##  9 260K   K2                     127       306.        308. -1.49         25
## 10 286J   K1                     109       290.        290. -0.381        31
## 11 312E   K1                      98       326.        330. -3.55         14
## 12 312F   K2                     150       344.        347. -2.55          8
## 13 314J   K1                     135       322.        325. -2.71         18
## 14 332H   K2                     101       340.        344. -3.90          9
## 15 338J   K1                     111       316.        319. -2.58         21
## # ℹ 3 more variables: rank_am <dbl>, rank_shift <dbl>, abs_rank_shift <dbl>

#save visit summary per day:

library(readr)
library(dplyr)
library(openxlsx)

# Load after-3SD enriched data
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
  mutate(
    grazing_treatment = toupper(trimws(grazing_treatment)),
    start_time = as.numeric(start_time),
    visit_date = as.Date("1899-12-30") + floor(start_time)
  ) %>%
  filter(grazing_treatment %in% c("K1", "K2"))

# Visits per day by treatment
visits_day <- dat %>%
  group_by(visit_date, grazing_treatment) %>%
  summarise(n_visits = n(), .groups = "drop") %>%
  arrange(visit_date, grazing_treatment)

# Summary table
visits_summary <- visits_day %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_days = n(),
    mean_visits_per_day = mean(n_visits, na.rm = TRUE),
    sd_visits_per_day = sd(n_visits, na.rm = TRUE),
    min_visits_per_day = min(n_visits, na.rm = TRUE),
    max_visits_per_day = max(n_visits, na.rm = TRUE),
    .groups = "drop"
  )

# Optional wide table (date rows, K1/K2 columns)
visits_wide <- visits_day %>%
  tidyr::pivot_wider(
    names_from = grazing_treatment,
    values_from = n_visits,
    values_fill = 0
  ) %>%
  arrange(visit_date)

# Save to Excel (multiple sheets)
out_xlsx <- "Visits_Per_Day_K1_K2_After3SD.xlsx"
wb <- createWorkbook()

addWorksheet(wb, "Visits_By_Day_Long")
writeData(wb, "Visits_By_Day_Long", visits_day)

addWorksheet(wb, "Visits_By_Day_Wide")
writeData(wb, "Visits_By_Day_Wide", visits_wide)

addWorksheet(wb, "Summary")
writeData(wb, "Summary", visits_summary)

saveWorkbook(wb, out_xlsx, overwrite = TRUE)

cat("Saved:", out_xlsx, "\n")

## Saved: Visits_Per_Day_K1_K2_After3SD.xlsx

print(visits_summary)

## # A tibble: 2 × 6
##   grazing_treatment n_days mean_visits_per_day sd_visits_per_day
##   <chr>              <int>               <dbl>             <dbl>
## 1 K1                    64                27.5              12.2
## 2 K2                    75                21.3              10.3
## # ℹ 2 more variables: min_visits_per_day <int>, max_visits_per_day <int>

# Create 14 days period
library(readr)
library(dplyr)
library(writexl)

# Load after-3SD dataset
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
  mutate(
    start_time = as.numeric(start_time),
    visit_date = as.Date("1899-12-30") + floor(start_time),
    grazing_treatment = toupper(trimws(grazing_treatment))
  )

# Build ordered day index
day_tbl <- dat %>%
  distinct(visit_date) %>%
  arrange(visit_date) %>%
  mutate(day_index = row_number())

# Parameters (your chosen rule)
drop_first_days <- 3
drop_last_days  <- 2
period_len <- 14

n_days_total <- nrow(day_tbl)

# Keep middle block
day_tbl2 <- day_tbl %>%
  filter(
    day_index > drop_first_days,
    day_index <= (n_days_total - drop_last_days)
  ) %>%
  mutate(
    kept_day_index = row_number(),
    period_id = ((kept_day_index - 1) %/% period_len) + 1
  )

# Check full periods only (optional strict filter)
max_full_period <- floor(nrow(day_tbl2) / period_len)
day_tbl2 <- day_tbl2 %>% filter(period_id <= max_full_period)

# Join period labels back to observations
dat_period <- dat %>%
  inner_join(day_tbl2 %>% select(visit_date, period_id), by = "visit_date")

# QA tables
period_day_counts <- day_tbl2 %>%
  group_by(period_id) %>%
  summarise(
    n_days = n(),
    start_date = min(visit_date),
    end_date = max(visit_date),
    .groups = "drop"
  )

period_visit_counts <- dat_period %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_visits = n(),
    n_cows = n_distinct(cow_id),
    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

cat("Total days:", n_days_total, "\n")

## Total days: 75

cat("Dropped first:", drop_first_days, " | Dropped last:", drop_last_days, "\n")

## Dropped first: 3  | Dropped last: 2

cat("Days retained:", nrow(day_tbl2), "\n")

## Days retained: 70

cat("Periods retained:", max(day_tbl2$period_id), "\n\n")

## Periods retained: 5

print(period_day_counts)

## # A tibble: 5 × 4
##   period_id n_days start_date end_date  
##       <dbl>  <int> <date>     <date>    
## 1         1     14 2025-06-30 2025-07-13
## 2         2     14 2025-07-14 2025-07-27
## 3         3     14 2025-07-28 2025-08-10
## 4         4     14 2025-08-11 2025-08-24
## 5         5     14 2025-08-25 2025-09-07

print(period_visit_counts)

## # A tibble: 10 × 4
##    period_id grazing_treatment n_visits n_cows
##        <dbl> <chr>                <int>  <int>
##  1         1 K1                     381     18
##  2         1 K2                     331     14
##  3         2 K1                     177     18
##  4         2 K2                     235     15
##  5         3 K1                     267     18
##  6         3 K2                     331     16
##  7         4 K1                     528     19
##  8         4 K2                     323     17
##  9         5 K1                     382     20
## 10         5 K2                     297     17

# Save outputs
write_csv(dat_period, "analysis_3SD_with_14day_periods.csv")
write_csv(period_day_counts, "period_day_counts_14day.csv")
write_csv(period_visit_counts, "period_visit_counts_14day.csv")

write_xlsx(
  list(
    data_with_periods = dat_period,
    period_day_counts = period_day_counts,
    period_visit_counts = period_visit_counts
  ),
  "analysis_3SD_with_14day_periods.xlsx"
)

# Create 4-hour time bins

library(readr)
library(dplyr)

# Load period-tagged dataset (from previous step)
dat <- read_csv("analysis_3SD_with_14day_periods.csv", show_col_types = FALSE)

# Create 6 time bins of 4 hours each using hour_of_the_day
dat_bins <- dat %>%
  mutate(
    hour_of_the_day = as.numeric(hour_of_the_day),
    hour_int = floor(hour_of_the_day),
    time_bin_4hr = case_when(
      hour_int >= 0  & hour_int < 4  ~ "00:00-04:00",
      hour_int >= 4  & hour_int < 8  ~ "04:00-08:00",
      hour_int >= 8  & hour_int < 12 ~ "08:00-12:00",
      hour_int >= 12 & hour_int < 16 ~ "12:00-16:00",
      hour_int >= 16 & hour_int < 20 ~ "16:00-20:00",
      hour_int >= 20 & hour_int <= 23 ~ "20:00-24:00",
      TRUE ~ NA_character_
    ),
    time_bin_4hr = factor(
      time_bin_4hr,
      levels = c(
        "00:00-04:00", "04:00-08:00", "08:00-12:00",
        "12:00-16:00", "16:00-20:00", "20:00-24:00"
      )
    )
  )

# Quick checks
bin_counts <- dat_bins %>%
  count(time_bin_4hr, sort = FALSE)

bin_counts_by_trt <- dat_bins %>%
  mutate(grazing_treatment = toupper(trimws(grazing_treatment))) %>%
  count(grazing_treatment, time_bin_4hr, sort = FALSE)

print(bin_counts)

## # A tibble: 6 × 2
##   time_bin_4hr     n
##   <fct>        <int>
## 1 00:00-04:00     69
## 2 04:00-08:00    648
## 3 08:00-12:00    717
## 4 12:00-16:00    749
## 5 16:00-20:00    794
## 6 20:00-24:00    275

print(bin_counts_by_trt)

## # A tibble: 12 × 3
##    grazing_treatment time_bin_4hr     n
##    <chr>             <fct>        <int>
##  1 K1                00:00-04:00     13
##  2 K1                04:00-08:00    425
##  3 K1                08:00-12:00    309
##  4 K1                12:00-16:00    379
##  5 K1                16:00-20:00    470
##  6 K1                20:00-24:00    139
##  7 K2                00:00-04:00     56
##  8 K2                04:00-08:00    223
##  9 K2                08:00-12:00    408
## 10 K2                12:00-16:00    370
## 11 K2                16:00-20:00    324
## 12 K2                20:00-24:00    136

# Save
write_csv(dat_bins, "analysis_3SD_with_14day_periods_4hr_bins.csv")
write_csv(bin_counts, "time_bin_4hr_counts.csv")
write_csv(bin_counts_by_trt, "time_bin_4hr_counts_by_treatment.csv")

# Filter number of visits more than or equal to 20 and time bin is at least 5 of 6
library(readr)
library(dplyr)
library(writexl)

# Load data with periods + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id),
    time_bin_4hr = as.character(time_bin_4hr)
  )

# Criteria:
# 1) >= 20 visits per cow per period
# 2) >= 5 of 6 time bins represented per cow per period
cow_period_stats <- dat %>%
  filter(!is.na(period_id), !is.na(time_bin_4hr)) %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(
    n_visits = n(),
    n_bins_covered = n_distinct(time_bin_4hr),
    bins_covered = paste(sort(unique(time_bin_4hr)), collapse = "; "),
    .groups = "drop"
  ) %>%
  mutate(
    pass_visits = n_visits >= 20,
    pass_bins = n_bins_covered >= 5,
    pass_all = pass_visits & pass_bins,
    fail_reason = case_when(
      !pass_visits & !pass_bins ~ "Fail visits + bins",
      !pass_visits & pass_bins  ~ "Fail visits only",
      pass_visits & !pass_bins  ~ "Fail bins only",
      TRUE ~ "Pass"
    )
  )

# Failed combinations
failed_cow_periods <- cow_period_stats %>%
  filter(!pass_all) %>%
  arrange(period_id, grazing_treatment, cow_id)

# Passed combinations
passed_cow_periods <- cow_period_stats %>%
  filter(pass_all) %>%
  arrange(period_id, grazing_treatment, cow_id)

# Keep only observations from passed cow-periods
dat_passed <- dat %>%
  inner_join(
    passed_cow_periods %>% select(cow_id, grazing_treatment, period_id),
    by = c("cow_id", "grazing_treatment", "period_id")
  )

# Summary by period/treatment
summary_tbl <- cow_period_stats %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_cow_period = n(),
    n_pass = sum(pass_all),
    n_fail = sum(!pass_all),
    n_fail_visits = sum(!pass_visits),
    n_fail_bins = sum(!pass_bins),
    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

# Print key results
cat("Total cow-period combinations:", nrow(cow_period_stats), "\n")

## Total cow-period combinations: 172

cat("Passed:", nrow(passed_cow_periods), "\n")

## Passed: 69

cat("Failed:", nrow(failed_cow_periods), "\n\n")

## Failed: 103

print(summary_tbl, n = nrow(summary_tbl))

## # A tibble: 10 × 7
##    period_id grazing_treatment n_cow_period n_pass n_fail n_fail_visits
##        <int> <chr>                    <int>  <int>  <int>         <int>
##  1         1 K1                          18     10      8             8
##  2         1 K2                          14      9      5             5
##  3         2 K1                          18      1     17            17
##  4         2 K2                          15      3     12            12
##  5         3 K1                          18      2     16            16
##  6         3 K2                          16      9      7             6
##  7         4 K1                          19     16      3             2
##  8         4 K2                          17      6     11             8
##  9         5 K1                          20      7     13             9
## 10         5 K2                          17      6     11            11
## # ℹ 1 more variable: n_fail_bins <int>

print(failed_cow_periods, n = nrow(failed_cow_periods))

## # A tibble: 103 × 10
##     cow_id grazing_treatment period_id n_visits n_bins_covered bins_covered     
##     <chr>  <chr>                 <int>    <int>          <int> <chr>            
##   1 306K   K1                        1       18              4 04:00-08:00; 12:…
##   2 322C   K1                        1       13              5 04:00-08:00; 08:…
##   3 364B   K1                        1       19              5 04:00-08:00; 08:…
##   4 370F   K1                        1       18              5 04:00-08:00; 08:…
##   5 410C   K1                        1       15              5 04:00-08:00; 08:…
##   6 426D   K1                        1       18              5 04:00-08:00; 08:…
##   7 436E   K1                        1       17              5 04:00-08:00; 08:…
##   8 454C   K1                        1       14              5 04:00-08:00; 08:…
##   9 266D   K2                        1       17              4 04:00-08:00; 08:…
##  10 400G   K2                        1        8              5 00:00-04:00; 04:…
##  11 404E   K2                        1        9              5 00:00-04:00; 04:…
##  12 442B   K2                        1        5              3 08:00-12:00; 12:…
##  13 446K   K2                        1       19              5 00:00-04:00; 04:…
##  14 264F   K1                        2       14              4 04:00-08:00; 12:…
##  15 266H   K1                        2        7              4 04:00-08:00; 08:…
##  16 286J   K1                        2        8              5 04:00-08:00; 08:…
##  17 290G   K1                        2        5              4 04:00-08:00; 08:…
##  18 306K   K1                        2       10              4 04:00-08:00; 12:…
##  19 312E   K1                        2        9              4 04:00-08:00; 08:…
##  20 314J   K1                        2       17              4 04:00-08:00; 12:…
##  21 322C   K1                        2        2              1 12:00-16:00      
##  22 322F   K1                        2       10              5 04:00-08:00; 08:…
##  23 338J   K1                        2       10              5 04:00-08:00; 08:…
##  24 364B   K1                        2        3              2 12:00-16:00; 16:…
##  25 370F   K1                        2        8              4 04:00-08:00; 08:…
##  26 370H   K1                        2       15              5 04:00-08:00; 08:…
##  27 410C   K1                        2       11              6 00:00-04:00; 04:…
##  28 426D   K1                        2        8              5 00:00-04:00; 04:…
##  29 436E   K1                        2        9              5 04:00-08:00; 08:…
##  30 454C   K1                        2        9              4 08:00-12:00; 12:…
##  31 252H   K2                        2       18              6 00:00-04:00; 04:…
##  32 260K   K2                        2       19              4 04:00-08:00; 08:…
##  33 266D   K2                        2       15              5 04:00-08:00; 08:…
##  34 270G   K2                        2       18              6 00:00-04:00; 04:…
##  35 296G   K2                        2       14              5 04:00-08:00; 08:…
##  36 296H   K2                        2       17              5 04:00-08:00; 08:…
##  37 326G   K2                        2       15              5 04:00-08:00; 08:…
##  38 332H   K2                        2       12              5 04:00-08:00; 08:…
##  39 368J   K2                        2       15              5 00:00-04:00; 04:…
##  40 400G   K2                        2        7              3 12:00-16:00; 16:…
##  41 442B   K2                        2        6              4 08:00-12:00; 12:…
##  42 446K   K2                        2       12              4 00:00-04:00; 04:…
##  43 258K   K1                        3       19              5 04:00-08:00; 08:…
##  44 266H   K1                        3       16              5 04:00-08:00; 08:…
##  45 286J   K1                        3       19              5 04:00-08:00; 08:…
##  46 290G   K1                        3       14              5 04:00-08:00; 08:…
##  47 306K   K1                        3       13              3 04:00-08:00; 12:…
##  48 312E   K1                        3       15              4 04:00-08:00; 08:…
##  49 314J   K1                        3       18              5 04:00-08:00; 08:…
##  50 322C   K1                        3        8              5 04:00-08:00; 08:…
##  51 322F   K1                        3       18              5 04:00-08:00; 08:…
##  52 338J   K1                        3       14              4 04:00-08:00; 08:…
##  53 364B   K1                        3        8              4 04:00-08:00; 08:…
##  54 370F   K1                        3       16              5 04:00-08:00; 08:…
##  55 410C   K1                        3       12              4 04:00-08:00; 08:…
##  56 426D   K1                        3       13              4 04:00-08:00; 08:…
##  57 436E   K1                        3       11              5 04:00-08:00; 08:…
##  58 454C   K1                        3       12              5 04:00-08:00; 08:…
##  59 260C   K2                        3       17              4 04:00-08:00; 08:…
##  60 266D   K2                        3       14              5 04:00-08:00; 08:…
##  61 332H   K2                        3       17              5 04:00-08:00; 08:…
##  62 400G   K2                        3       16              5 00:00-04:00; 04:…
##  63 404E   K2                        3       21              4 04:00-08:00; 08:…
##  64 442B   K2                        3       11              5 04:00-08:00; 08:…
##  65 446K   K2                        3       17              5 04:00-08:00; 08:…
##  66 306K   K1                        4       20              4 04:00-08:00; 08:…
##  67 338D   K1                        4        7              3 08:00-12:00; 12:…
##  68 410C   K1                        4       19              5 04:00-08:00; 08:…
##  69 266D   K2                        4       17              4 04:00-08:00; 08:…
##  70 296G   K2                        4       18              5 04:00-08:00; 08:…
##  71 296H   K2                        4       19              4 04:00-08:00; 08:…
##  72 326G   K2                        4       20              4 04:00-08:00; 08:…
##  73 332H   K2                        4       18              5 04:00-08:00; 08:…
##  74 368J   K2                        4       25              4 04:00-08:00; 08:…
##  75 398G   K2                        4       19              4 04:00-08:00; 08:…
##  76 400G   K2                        4       17              5 04:00-08:00; 08:…
##  77 422J   K2                        4        2              2 12:00-16:00; 16:…
##  78 442B   K2                        4       15              4 04:00-08:00; 08:…
##  79 446K   K2                        4       23              3 08:00-12:00; 12:…
##  80 264F   K1                        5       24              4 04:00-08:00; 08:…
##  81 266H   K1                        5       22              4 04:00-08:00; 08:…
##  82 306K   K1                        5       17              4 04:00-08:00; 08:…
##  83 314J   K1                        5       29              4 04:00-08:00; 08:…
##  84 322C   K1                        5       17              5 04:00-08:00; 08:…
##  85 322F   K1                        5       14              5 04:00-08:00; 08:…
##  86 338D   K1                        5        9              4 08:00-12:00; 12:…
##  87 364B   K1                        5       16              5 00:00-04:00; 04:…
##  88 370H   K1                        5       11              5 04:00-08:00; 08:…
##  89 374E   K1                        5       16              5 04:00-08:00; 08:…
##  90 426D   K1                        5       16              5 04:00-08:00; 08:…
##  91 436E   K1                        5       20              4 04:00-08:00; 08:…
##  92 454C   K1                        5       12              4 08:00-12:00; 12:…
##  93 266D   K2                        5       13              4 04:00-08:00; 08:…
##  94 296H   K2                        5       17              5 04:00-08:00; 08:…
##  95 326G   K2                        5       18              5 04:00-08:00; 08:…
##  96 332H   K2                        5       18              5 04:00-08:00; 08:…
##  97 368J   K2                        5       17              5 04:00-08:00; 08:…
##  98 398G   K2                        5       13              4 04:00-08:00; 08:…
##  99 400G   K2                        5       17              4 04:00-08:00; 08:…
## 100 404E   K2                        5       17              3 08:00-12:00; 12:…
## 101 422J   K2                        5       12              3 08:00-12:00; 12:…
## 102 442B   K2                        5        7              4 04:00-08:00; 08:…
## 103 446K   K2                        5       17              4 08:00-12:00; 12:…
## # ℹ 4 more variables: pass_visits <lgl>, pass_bins <lgl>, pass_all <lgl>,
## #   fail_reason <chr>

# Save CSV outputs
write_csv(cow_period_stats, "cow_period_criteria_stats_14day.csv")
write_csv(failed_cow_periods, "failed_cow_periods_14day.csv")
write_csv(passed_cow_periods, "passed_cow_periods_14day.csv")
write_csv(dat_passed, "analysis_3SD_14day_passed_only.csv")
write_csv(summary_tbl, "cow_period_criteria_summary_14day.csv")

# Save Excel workbook
write_xlsx(
  list(
    cow_period_stats = cow_period_stats,
    failed_cow_periods = failed_cow_periods,
    passed_cow_periods = passed_cow_periods,
    summary = summary_tbl,
    data_passed_only = dat_passed
  ),
  "TOD_14day_filtering_results.xlsx"
)

#Cow loss to filtering
library(readr)
library(dplyr)

# Files
before <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE)
after  <- read_csv("analysis_3SD_14day_passed_only.csv", show_col_types = FALSE)
cp     <- read_csv("cow_period_criteria_stats_14day.csv", show_col_types = FALSE)

# Counts
obs_before <- nrow(before)
obs_after  <- nrow(after)

cows_before <- n_distinct(before$cow_id)
cows_after  <- n_distinct(after$cow_id)

cowperiod_before <- nrow(cp)
cowperiod_after  <- sum(cp$pass_all, na.rm = TRUE)

# Percent loss/retained
summary_loss <- tibble(
  group = c("Observations", "Cows", "Cow-periods"),
  n_before = c(obs_before, cows_before, cowperiod_before),
  n_after  = c(obs_after, cows_after, cowperiod_after)
) %>%
  mutate(
    n_lost = n_before - n_after,
    pct_lost = 100 * n_lost / n_before,
    pct_retained = 100 * n_after / n_before
  )

print(summary_loss)

## # A tibble: 3 × 6
##   group        n_before n_after n_lost pct_lost pct_retained
##   <chr>           <int>   <int>  <int>    <dbl>        <dbl>
## 1 Observations     3252    1772   1480     45.5         54.5
## 2 Cows               37      29      8     21.6         78.4
## 3 Cow-periods       172      69    103     59.9         40.1

# Optional: save
write_csv(summary_loss, "loss_summary_20visits_5bins.csv")

# cow ids with <20 visits per 14-day period
library(readr)
library(dplyr)

# -----------------------------
# Load data
# -----------------------------
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  filter(!is.na(period_id))

# -----------------------------
# Visits per cow per period
# -----------------------------
cow_period_visits <- dat %>%
  group_by(period_id, grazing_treatment, cow_id) %>%
  summarise(n_visits = n(), .groups = "drop")

# Cows failing criterion: <20 visits
fail_lt20 <- cow_period_visits %>%
  filter(n_visits < 20) %>%
  arrange(period_id, grazing_treatment, cow_id)

# -----------------------------
# Counts
# -----------------------------
# By period
fail_lt20_counts <- fail_lt20 %>%
  group_by(period_id) %>%
  summarise(
    n_cows_lt20 = n_distinct(cow_id),
    n_cow_period_lt20 = n(),
    .groups = "drop"
  ) %>%
  arrange(period_id)

# By period x treatment
fail_lt20_counts_trt <- fail_lt20 %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_cows_lt20 = n_distinct(cow_id),
    n_cow_period_lt20 = n(),
    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

# -----------------------------
# Print
# -----------------------------
cat("=== Cows with <20 visits by period ===\n")

## === Cows with <20 visits by period ===

if (nrow(fail_lt20_counts) == 0) {
  cat("No cows had <20 visits in any 14-day period.\n")
} else {
  print(fail_lt20_counts, n = nrow(fail_lt20_counts))
}

## # A tibble: 5 × 3
##   period_id n_cows_lt20 n_cow_period_lt20
##       <int>       <int>             <int>
## 1         1          13                13
## 2         2          29                29
## 3         3          22                22
## 4         4          10                10
## 5         5          20                20

cat("\n=== Cows with <20 visits by period x treatment ===\n")

## 
## === Cows with <20 visits by period x treatment ===

if (nrow(fail_lt20_counts_trt) == 0) {
  cat("No cows had <20 visits in any 14-day period x treatment.\n")
} else {
  print(fail_lt20_counts_trt, n = nrow(fail_lt20_counts_trt))
}

## # A tibble: 10 × 4
##    period_id grazing_treatment n_cows_lt20 n_cow_period_lt20
##        <int> <chr>                   <int>             <int>
##  1         1 K1                          8                 8
##  2         1 K2                          5                 5
##  3         2 K1                         17                17
##  4         2 K2                         12                12
##  5         3 K1                         16                16
##  6         3 K2                          6                 6
##  7         4 K1                          2                 2
##  8         4 K2                          8                 8
##  9         5 K1                          9                 9
## 10         5 K2                         11                11

cat("\n=== IDs failing <20 visits (cow-period level) ===\n")

## 
## === IDs failing <20 visits (cow-period level) ===

if (nrow(fail_lt20) == 0) {
  cat("No failing cow-period rows for <20 visits.\n")
} else {
  print(fail_lt20, n = nrow(fail_lt20))
}

## # A tibble: 94 × 4
##    period_id grazing_treatment cow_id n_visits
##        <int> <chr>             <chr>     <int>
##  1         1 K1                306K         18
##  2         1 K1                322C         13
##  3         1 K1                364B         19
##  4         1 K1                370F         18
##  5         1 K1                410C         15
##  6         1 K1                426D         18
##  7         1 K1                436E         17
##  8         1 K1                454C         14
##  9         1 K2                266D         17
## 10         1 K2                400G          8
## 11         1 K2                404E          9
## 12         1 K2                442B          5
## 13         1 K2                446K         19
## 14         2 K1                264F         14
## 15         2 K1                266H          7
## 16         2 K1                286J          8
## 17         2 K1                290G          5
## 18         2 K1                306K         10
## 19         2 K1                312E          9
## 20         2 K1                314J         17
## 21         2 K1                322C          2
## 22         2 K1                322F         10
## 23         2 K1                338J         10
## 24         2 K1                364B          3
## 25         2 K1                370F          8
## 26         2 K1                370H         15
## 27         2 K1                410C         11
## 28         2 K1                426D          8
## 29         2 K1                436E          9
## 30         2 K1                454C          9
## 31         2 K2                252H         18
## 32         2 K2                260K         19
## 33         2 K2                266D         15
## 34         2 K2                270G         18
## 35         2 K2                296G         14
## 36         2 K2                296H         17
## 37         2 K2                326G         15
## 38         2 K2                332H         12
## 39         2 K2                368J         15
## 40         2 K2                400G          7
## 41         2 K2                442B          6
## 42         2 K2                446K         12
## 43         3 K1                258K         19
## 44         3 K1                266H         16
## 45         3 K1                286J         19
## 46         3 K1                290G         14
## 47         3 K1                306K         13
## 48         3 K1                312E         15
## 49         3 K1                314J         18
## 50         3 K1                322C          8
## 51         3 K1                322F         18
## 52         3 K1                338J         14
## 53         3 K1                364B          8
## 54         3 K1                370F         16
## 55         3 K1                410C         12
## 56         3 K1                426D         13
## 57         3 K1                436E         11
## 58         3 K1                454C         12
## 59         3 K2                260C         17
## 60         3 K2                266D         14
## 61         3 K2                332H         17
## 62         3 K2                400G         16
## 63         3 K2                442B         11
## 64         3 K2                446K         17
## 65         4 K1                338D          7
## 66         4 K1                410C         19
## 67         4 K2                266D         17
## 68         4 K2                296G         18
## 69         4 K2                296H         19
## 70         4 K2                332H         18
## 71         4 K2                398G         19
## 72         4 K2                400G         17
## 73         4 K2                422J          2
## 74         4 K2                442B         15
## 75         5 K1                306K         17
## 76         5 K1                322C         17
## 77         5 K1                322F         14
## 78         5 K1                338D          9
## 79         5 K1                364B         16
## 80         5 K1                370H         11
## 81         5 K1                374E         16
## 82         5 K1                426D         16
## 83         5 K1                454C         12
## 84         5 K2                266D         13
## 85         5 K2                296H         17
## 86         5 K2                326G         18
## 87         5 K2                332H         18
## 88         5 K2                368J         17
## 89         5 K2                398G         13
## 90         5 K2                400G         17
## 91         5 K2                404E         17
## 92         5 K2                422J         12
## 93         5 K2                442B          7
## 94         5 K2                446K         17

# -----------------------------
# Save outputs
# -----------------------------
write_csv(cow_period_visits, "cow_period_visits_14day.csv")
write_csv(fail_lt20, "failed_lt20_visits_cow_period_14day.csv")
write_csv(fail_lt20_counts, "failed_lt20_visits_counts_by_period_14day.csv")
write_csv(fail_lt20_counts_trt, "failed_lt20_visits_counts_by_period_treatment_14day.csv")

# Check how many cows qualify for each period:

library(readr)
library(dplyr)
library(tidyr)
library(writexl)

# Load data with period + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1", "K2"))

# Per cow x period stats
cow_period_stats <- dat %>%
  group_by(period_id, grazing_treatment, cow_id) %>%
  summarise(
    n_visits = n(),
    n_bins_covered = n_distinct(time_bin_4hr),
    .groups = "drop"
  ) %>%
  mutate(
    pass_visits = n_visits >= 20,
    pass_bins = n_bins_covered >= 5,
    pass_all = pass_visits & pass_bins
  )

# Qualified cows per period x treatment
qualified_by_period_trt <- cow_period_stats %>%
  filter(pass_all) %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_qualified_cows = n_distinct(cow_id),
    qualified_cow_ids = paste(sort(unique(cow_id)), collapse = ", "),
    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

# Also show totals evaluated per period x treatment
evaluated_by_period_trt <- cow_period_stats %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_cows_evaluated = n_distinct(cow_id),
    .groups = "drop"
  )

summary_table <- evaluated_by_period_trt %>%
  left_join(qualified_by_period_trt, by = c("period_id", "grazing_treatment")) %>%
  mutate(
    n_qualified_cows = ifelse(is.na(n_qualified_cows), 0L, n_qualified_cows),
    qualified_cow_ids = ifelse(is.na(qualified_cow_ids), "", qualified_cow_ids),
    pct_qualified = round(100 * n_qualified_cows / n_cows_evaluated, 1)
  ) %>%
  arrange(period_id, grazing_treatment)

# Optional wide count table
summary_wide <- summary_table %>%
  select(period_id, grazing_treatment, n_qualified_cows) %>%
  pivot_wider(
    names_from = grazing_treatment,
    values_from = n_qualified_cows,
    values_fill = 0,
    names_prefix = "qualified_"
  ) %>%
  arrange(period_id)

cat("=== Qualified cows per period by treatment (5/6 bins + >=20 visits) ===\n")

## === Qualified cows per period by treatment (5/6 bins + >=20 visits) ===

print(summary_table, n = nrow(summary_table))

## # A tibble: 10 × 6
##    period_id grazing_treatment n_cows_evaluated n_qualified_cows
##        <int> <chr>                        <int>            <int>
##  1         1 K1                              18               10
##  2         1 K2                              14                9
##  3         2 K1                              18                1
##  4         2 K2                              15                3
##  5         3 K1                              18                2
##  6         3 K2                              16                9
##  7         4 K1                              19               16
##  8         4 K2                              17                6
##  9         5 K1                              20                7
## 10         5 K2                              17                6
## # ℹ 2 more variables: qualified_cow_ids <chr>, pct_qualified <dbl>

cat("\n=== Wide count table ===\n")

## 
## === Wide count table ===

print(summary_wide, n = nrow(summary_wide))

## # A tibble: 5 × 3
##   period_id qualified_K1 qualified_K2
##       <int>        <int>        <int>
## 1         1           10            9
## 2         2            1            3
## 3         3            2            9
## 4         4           16            6
## 5         5            7            6

# Save outputs
write_csv(cow_period_stats, "cow_period_stats_criteria_14day.csv")
write_csv(summary_table, "qualified_cows_per_period_treatment_14day.csv")
write_csv(summary_wide, "qualified_cows_per_period_treatment_14day_wide.csv")

write_xlsx(
  list(
    cow_period_stats = cow_period_stats,
    qualified_summary = summary_table,
    qualified_counts_wide = summary_wide
  ),
  "qualified_cows_per_period_treatment_14day.xlsx"
)

#Check why cows failed
library(readr)
library(dplyr)
library(tidyr)

dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1","K2"))

cow_period <- dat %>%
  group_by(period_id, grazing_treatment, cow_id) %>%
  summarise(
    n_visits = n(),
    n_bins = n_distinct(time_bin_4hr),
    .groups = "drop"
  )

# 1) Why cows fail under current rule (>=20 visits and >=5 bins)
fail_diag <- cow_period %>%
  mutate(
    fail_visits = n_visits < 20,
    fail_bins = n_bins < 5,
    fail_type = case_when(
      fail_visits & fail_bins ~ "both",
      fail_visits ~ "visits_only",
      fail_bins ~ "bins_only",
      TRUE ~ "pass"
    )
  ) %>%
  group_by(period_id, grazing_treatment, fail_type) %>%
  summarise(n_cows = n(), .groups = "drop") %>%
  arrange(period_id, grazing_treatment, fail_type)

print(fail_diag, n = nrow(fail_diag))

## # A tibble: 34 × 4
##    period_id grazing_treatment fail_type   n_cows
##        <int> <chr>             <chr>        <int>
##  1         1 K1                both             1
##  2         1 K1                pass            10
##  3         1 K1                visits_only      7
##  4         1 K2                both             2
##  5         1 K2                pass             9
##  6         1 K2                visits_only      3
##  7         2 K1                both            10
##  8         2 K1                pass             1
##  9         2 K1                visits_only      7
## 10         2 K2                both             4
## 11         2 K2                pass             3
## 12         2 K2                visits_only      8
## 13         3 K1                both             6
## 14         3 K1                pass             2
## 15         3 K1                visits_only     10
## 16         3 K2                bins_only        1
## 17         3 K2                both             1
## 18         3 K2                pass             9
## 19         3 K2                visits_only      5
## 20         4 K1                bins_only        1
## 21         4 K1                both             1
## 22         4 K1                pass            16
## 23         4 K1                visits_only      1
## 24         4 K2                bins_only        3
## 25         4 K2                both             5
## 26         4 K2                pass             6
## 27         4 K2                visits_only      3
## 28         5 K1                bins_only        4
## 29         5 K1                both             3
## 30         5 K1                pass             7
## 31         5 K1                visits_only      6
## 32         5 K2                both             7
## 33         5 K2                pass             6
## 34         5 K2                visits_only      4

write_csv(fail_diag, "failure_reason_by_period_treatment.csv")

# 2) Sensitivity grid to find workable rule
visit_thresholds <- c(20, 18, 15)
bin_thresholds <- c(5, 4)

sens <- tidyr::crossing(min_visits = visit_thresholds, min_bins = bin_thresholds) %>%
  rowwise() %>%
  do({
    mv <- .$min_visits
    mb <- .$min_bins
    out <- cow_period %>%
      mutate(pass = n_visits >= mv & n_bins >= mb) %>%
      group_by(period_id, grazing_treatment) %>%
      summarise(
        n_qualified = sum(pass),
        n_total = n(),
        pct_qualified = round(100 * n_qualified / n_total, 1),
        .groups = "drop"
      ) %>%
      mutate(min_visits = mv, min_bins = mb)
    out
  }) %>%
  ungroup() %>%
  select(min_visits, min_bins, period_id, grazing_treatment, n_qualified, n_total, pct_qualified) %>%
  arrange(min_visits, min_bins, period_id, grazing_treatment)

print(sens, n = nrow(sens))

## # A tibble: 60 × 7
##    min_visits min_bins period_id grazing_treatment n_qualified n_total
##         <dbl>    <dbl>     <int> <chr>                   <int>   <int>
##  1         15        4         1 K1                         16      18
##  2         15        4         1 K2                         11      14
##  3         15        4         2 K1                          3      18
##  4         15        4         2 K2                         10      15
##  5         15        4         3 K1                          9      18
##  6         15        4         3 K2                         14      16
##  7         15        4         4 K1                         18      19
##  8         15        4         4 K2                         15      17
##  9         15        4         5 K1                         16      20
## 10         15        4         5 K2                         12      17
## 11         15        5         1 K1                         15      18
## 12         15        5         1 K2                         10      14
## 13         15        5         2 K1                          2      18
## 14         15        5         2 K2                          9      15
## 15         15        5         3 K1                          8      18
## 16         15        5         3 K2                         12      16
## 17         15        5         4 K1                         17      19
## 18         15        5         4 K2                          9      17
## 19         15        5         5 K1                         11      20
## 20         15        5         5 K2                         10      17
## 21         18        4         1 K1                         14      18
## 22         18        4         1 K2                         10      14
## 23         18        4         2 K1                          1      18
## 24         18        4         2 K2                          6      15
## 25         18        4         3 K1                          6      18
## 26         18        4         3 K2                         10      16
## 27         18        4         4 K1                         18      19
## 28         18        4         4 K2                         12      17
## 29         18        4         5 K1                         11      20
## 30         18        4         5 K2                          8      17
## 31         18        5         1 K1                         13      18
## 32         18        5         1 K2                         10      14
## 33         18        5         2 K1                          1      18
## 34         18        5         2 K2                          5      15
## 35         18        5         3 K1                          6      18
## 36         18        5         3 K2                          9      16
## 37         18        5         4 K1                         17      19
## 38         18        5         4 K2                          8      17
## 39         18        5         5 K1                          7      20
## 40         18        5         5 K2                          8      17
## 41         20        4         1 K1                         10      18
## 42         20        4         1 K2                          9      14
## 43         20        4         2 K1                          1      18
## 44         20        4         2 K2                          3      15
## 45         20        4         3 K1                          2      18
## 46         20        4         3 K2                         10      16
## 47         20        4         4 K1                         17      19
## 48         20        4         4 K2                          8      17
## 49         20        4         5 K1                         11      20
## 50         20        4         5 K2                          6      17
## 51         20        5         1 K1                         10      18
## 52         20        5         1 K2                          9      14
## 53         20        5         2 K1                          1      18
## 54         20        5         2 K2                          3      15
## 55         20        5         3 K1                          2      18
## 56         20        5         3 K2                          9      16
## 57         20        5         4 K1                         16      19
## 58         20        5         4 K2                          6      17
## 59         20        5         5 K1                          7      20
## 60         20        5         5 K2                          6      17
## # ℹ 1 more variable: pct_qualified <dbl>

write_csv(sens, "criteria_sensitivity_14day.csv")

# Use less stringent filtering criteria and compare
library(readr)
library(dplyr)
library(lme4)
library(performance)

# -----------------------------
# Repeatability (ICC) on filtered cow-period data
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    period_id = factor(period_id),
    CH4_g_d = as.numeric(CH4_g_d)
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(cow_id), !is.na(period_id))

# Basic checks
cat("Raw filtered rows:", nrow(dat), "\n")

## Raw filtered rows: 1772

cat("Unique cows:", n_distinct(dat$cow_id), "\n")

## Unique cows: 29

cat("Unique periods:", n_distinct(dat$period_id), "\n\n")

## Unique periods: 5

# Mean CH4 per cow-period
cow_period <- dat %>%
  group_by(cow_id, period_id) %>%
  summarise(ch4_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop")

cat("Cow-period rows:", nrow(cow_period), "\n")

## Cow-period rows: 69

cat("Cow-period counts by period:\n")

## Cow-period counts by period:

print(table(cow_period$period_id))

## 
##  1  2  3  4  5 
## 19  4 11 22 13

cat("\n")

# Optional: how many periods per cow (for stability context)
periods_per_cow <- cow_period %>%
  group_by(cow_id) %>%
  summarise(n_periods = n_distinct(period_id), .groups = "drop")

cat("Periods per cow summary:\n")

## Periods per cow summary:

print(summary(periods_per_cow$n_periods))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   2.379   3.000   5.000

cat("\n")

# Random-intercept model for repeatability
m_rep <- lmer(ch4_mean ~ (1 | cow_id), data = cow_period, REML = TRUE)

# ICC (repeatability)
icc_out <- performance::icc(m_rep)
cat("=== Repeatability (ICC) ===\n")

## === Repeatability (ICC) ===

print(icc_out)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.519
##   Unadjusted ICC: 0.519

cat("\n")

# Variance components
cat("=== Variance Components ===\n")

## === Variance Components ===

print(VarCorr(m_rep), comp = "Variance")

##  Groups   Name        Variance
##  cow_id   (Intercept) 1050.30 
##  Residual              973.28

cat("\n")

# Save outputs
# ICC object is list-like; coerce safely
icc_tbl <- as.data.frame(icc_out)
write_csv(icc_tbl, "repeatability_icc_CH4_14day_filtered.csv")

var_tbl <- as.data.frame(VarCorr(m_rep))
write_csv(var_tbl, "repeatability_variance_components_CH4_14day_filtered.csv")

write_csv(cow_period, "repeatability_input_cow_period_CH4_14day_filtered.csv")

library(readr)
library(dplyr)
library(tidyr)
library(lme4)
library(performance)

# ---------------------------------------------------------
# Compare ICC across filter rules:
# 1) >=20 visits & >=5 bins
# 2) >=18 visits & >=5 bins
# 3) >=15 visits & >=4 bins
# ---------------------------------------------------------

# Load data with period + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(
    !is.na(period_id),
    !is.na(time_bin_4hr),
    is.finite(CH4_g_d),
    grazing_treatment %in% c("K1", "K2")
  )

# Candidate rules
rules <- tibble::tribble(
  ~rule_name, ~min_visits, ~min_bins,
  "20v_5bins", 20, 5,
  "18v_5bins", 18, 5,
  "15v_4bins", 15, 4
)

# Per cow-period stats once
cow_period_stats <- dat %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(
    n_visits = n(),
    n_bins = n_distinct(time_bin_4hr),
    ch4_mean = mean(CH4_g_d, na.rm = TRUE),
    .groups = "drop"
  )

run_rule_icc <- function(min_visits, min_bins, rule_name) {
  cp <- cow_period_stats %>%
    filter(n_visits >= min_visits, n_bins >= min_bins)

  # Basic retention stats
  n_cows <- n_distinct(cp$cow_id)
  n_rows <- nrow(cp)
  n_periods <- n_distinct(cp$period_id)

  period_counts <- cp %>%
    count(period_id, name = "n_cow_period_rows") %>%
    complete(period_id = sort(unique(cow_period_stats$period_id)),
             fill = list(n_cow_period_rows = 0)) %>%
    arrange(period_id)

  # Need enough data to fit ICC
  if (n_cows < 2 || n_rows < 3) {
    icc_adj <- NA_real_
    icc_unadj <- NA_real_
    var_cow <- NA_real_
    var_resid <- NA_real_
    fit_status <- "insufficient_data"
  } else {
    m <- lmer(ch4_mean ~ (1 | cow_id), data = cp, REML = TRUE)

    icc_obj <- performance::icc(m)
    icc_adj <- as.numeric(icc_obj$ICC_adjusted)
    icc_unadj <- as.numeric(icc_obj$ICC_unadjusted)

    vc <- as.data.frame(VarCorr(m))
    var_cow <- vc$vcov[vc$grp == "cow_id"]
    var_resid <- vc$vcov[vc$grp == "Residual"]
    fit_status <- "ok"
  }

  summary_row <- tibble(
    rule_name = rule_name,
    min_visits = min_visits,
    min_bins = min_bins,
    n_cows_retained = n_cows,
    n_cow_period_rows = n_rows,
    n_periods_retained = n_periods,
    ICC_adjusted = icc_adj,
    ICC_unadjusted = icc_unadj,
    var_between_cow = var_cow,
    var_within_resid = var_resid,
    fit_status = fit_status
  )

  period_row <- period_counts %>%
    mutate(
      rule_name = rule_name,
      min_visits = min_visits,
      min_bins = min_bins
    ) %>%
    select(rule_name, min_visits, min_bins, period_id, n_cow_period_rows)

  list(summary = summary_row, period = period_row, data = cp)
}

# Run all rules
res_list <- lapply(seq_len(nrow(rules)), function(i) {
  run_rule_icc(rules$min_visits[i], rules$min_bins[i], rules$rule_name[i])
})

icc_summary <- bind_rows(lapply(res_list, `[[`, "summary")) %>%
  arrange(desc(ICC_adjusted), desc(n_cows_retained), desc(n_cow_period_rows))

period_coverage <- bind_rows(lapply(res_list, `[[`, "period")) %>%
  arrange(rule_name, period_id)

# Print
cat("=== ICC comparison across rules ===\n")

## === ICC comparison across rules ===

print(icc_summary, n = nrow(icc_summary))

## # A tibble: 3 × 11
##   rule_name min_visits min_bins n_cows_retained n_cow_period_rows
##   <chr>          <dbl>    <dbl>           <int>             <int>
## 1 15v_4bins         15        4              35               124
## 2 20v_5bins         20        5              29                69
## 3 18v_5bins         18        5              30                84
## # ℹ 6 more variables: n_periods_retained <int>, ICC_adjusted <dbl>,
## #   ICC_unadjusted <dbl>, var_between_cow <dbl>, var_within_resid <dbl>,
## #   fit_status <chr>

cat("\n=== Period coverage (cow-period rows) by rule ===\n")

## 
## === Period coverage (cow-period rows) by rule ===

print(period_coverage, n = nrow(period_coverage))

## # A tibble: 15 × 5
##    rule_name min_visits min_bins period_id n_cow_period_rows
##    <chr>          <dbl>    <dbl>     <int>             <int>
##  1 15v_4bins         15        4         1                27
##  2 15v_4bins         15        4         2                13
##  3 15v_4bins         15        4         3                23
##  4 15v_4bins         15        4         4                33
##  5 15v_4bins         15        4         5                28
##  6 18v_5bins         18        5         1                23
##  7 18v_5bins         18        5         2                 6
##  8 18v_5bins         18        5         3                15
##  9 18v_5bins         18        5         4                25
## 10 18v_5bins         18        5         5                15
## 11 20v_5bins         20        5         1                19
## 12 20v_5bins         20        5         2                 4
## 13 20v_5bins         20        5         3                11
## 14 20v_5bins         20        5         4                22
## 15 20v_5bins         20        5         5                13

# Save outputs
write_csv(icc_summary, "ICC_comparison_by_filter_rule.csv")
write_csv(period_coverage, "ICC_period_coverage_by_filter_rule.csv")

# Also save each retained cow-period dataset
for (obj in res_list) {
  nm <- obj$summary$rule_name[1]
  write_csv(obj$data, paste0("cow_period_retained_", nm, ".csv"))
}

# 15 visits, 4 time bin selected. Check cows that failed

library(readr)
library(dplyr)
library(writexl)

# Chosen rule
min_visits <- 15
min_bins <- 4

# Load data
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    period_id = as.integer(period_id),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1", "K2"))

# Cow-period stats
cow_period_stats <- dat %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(
    n_visits = n(),
    n_bins_covered = n_distinct(time_bin_4hr),
    bins_covered = paste(sort(unique(time_bin_4hr)), collapse = "; "),
    .groups = "drop"
  ) %>%
  mutate(
    pass_visits = n_visits >= min_visits,
    pass_bins = n_bins_covered >= min_bins,
    pass_all = pass_visits & pass_bins,
    fail_reason = case_when(
      !pass_visits & !pass_bins ~ "Fail visits + bins",
      !pass_visits & pass_bins ~ "Fail visits only",
      pass_visits & !pass_bins ~ "Fail bins only",
      TRUE ~ "Pass"
    )
  )

# Passed and failed cow-period rows
passed_cow_periods <- cow_period_stats %>%
  filter(pass_all) %>%
  arrange(period_id, grazing_treatment, cow_id)

failed_cow_periods <- cow_period_stats %>%
  filter(!pass_all) %>%
  arrange(period_id, grazing_treatment, cow_id)

# IDs retained vs removed at cow level
all_ids <- sort(unique(cow_period_stats$cow_id))
retained_ids <- sort(unique(passed_cow_periods$cow_id))
removed_ids <- setdiff(all_ids, retained_ids)

retained_id_table <- tibble(cow_id = retained_ids) %>%
  left_join(
    dat %>% distinct(cow_id, grazing_treatment),
    by = "cow_id"
  ) %>%
  arrange(grazing_treatment, cow_id)

removed_id_table <- tibble(cow_id = removed_ids) %>%
  left_join(
    dat %>% distinct(cow_id, grazing_treatment),
    by = "cow_id"
  ) %>%
  arrange(grazing_treatment, cow_id)

# Period-level retained counts
retained_counts_period <- passed_cow_periods %>%
  count(period_id, grazing_treatment, name = "n_retained_cow_period") %>%
  arrange(period_id, grazing_treatment)

# Final filtered observation-level dataset
dat_passed <- dat %>%
  inner_join(
    passed_cow_periods %>% select(cow_id, grazing_treatment, period_id),
    by = c("cow_id", "grazing_treatment", "period_id")
  )

# Print summary
cat("Rule used: >=", min_visits, " visits and >=", min_bins, " bins per cow-period\n\n")

## Rule used: >= 15  visits and >= 4  bins per cow-period

cat("Total unique cows:", length(all_ids), "\n")

## Total unique cows: 37

cat("Retained unique cows:", length(retained_ids), "\n")

## Retained unique cows: 35

cat("Removed unique cows:", length(removed_ids), "\n\n")

## Removed unique cows: 2

cat("Removed cow IDs:\n")

## Removed cow IDs:

if (nrow(removed_id_table) == 0) {
  cat("None\n")
} else {
  print(removed_id_table, n = nrow(removed_id_table))
}

## # A tibble: 2 × 2
##   cow_id grazing_treatment
##   <chr>  <chr>            
## 1 338D   K1               
## 2 422J   K2

cat("\nRetained cow-period counts by period x treatment:\n")

## 
## Retained cow-period counts by period x treatment:

print(retained_counts_period, n = nrow(retained_counts_period))

## # A tibble: 10 × 3
##    period_id grazing_treatment n_retained_cow_period
##        <int> <chr>                             <int>
##  1         1 K1                                   16
##  2         1 K2                                   11
##  3         2 K1                                    3
##  4         2 K2                                   10
##  5         3 K1                                    9
##  6         3 K2                                   14
##  7         4 K1                                   18
##  8         4 K2                                   15
##  9         5 K1                                   16
## 10         5 K2                                   12

# Save CSVs
write_csv(cow_period_stats, "cow_period_stats_15v_4bins.csv")
write_csv(passed_cow_periods, "passed_cow_periods_15v_4bins.csv")
write_csv(failed_cow_periods, "failed_cow_periods_15v_4bins.csv")
write_csv(retained_id_table, "retained_cow_ids_15v_4bins.csv")
write_csv(removed_id_table, "removed_cow_ids_15v_4bins.csv")
write_csv(retained_counts_period, "retained_counts_by_period_treatment_15v_4bins.csv")
write_csv(dat_passed, "analysis_3SD_14day_passed_only_15v_4bins.csv")

# Save Excel workbook
write_xlsx(
  list(
    cow_period_stats = cow_period_stats,
    passed_cow_periods = passed_cow_periods,
    failed_cow_periods = failed_cow_periods,
    retained_cow_ids = retained_id_table,
    removed_cow_ids = removed_id_table,
    retained_counts_period_treatment = retained_counts_period,
    data_passed_only = dat_passed
  ),
  "TOD_14day_filtering_15v_4bins_results.xlsx"
)

# Compute mean CH4 and CO2 per period:

library(readr)
library(dplyr)
library(writexl)

# Use the filtered dataset from chosen rule (15 visits + 4 bins)
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    period_id = as.integer(period_id),
    grazing_treatment = toupper(trimws(grazing_treatment)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  ) %>%
  filter(!is.na(period_id), grazing_treatment %in% c("K1", "K2"))

# ---------------------------------------
# Mean CH4 and CO2 by period x treatment
# ---------------------------------------
period_means <- dat %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    n_obs = n(),
    n_cows = n_distinct(cow_id),

    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_obs),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,

    CO2_mean = mean(CO2_g_d, na.rm = TRUE),
    CO2_sd = sd(CO2_g_d, na.rm = TRUE),
    CO2_se = CO2_sd / sqrt(n_obs),
    CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
    CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,

    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

# Optional: pooled across treatment (period only)
period_means_overall <- dat %>%
  group_by(period_id) %>%
  summarise(
    n_obs = n(),
    n_cows = n_distinct(cow_id),

    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_obs),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,

    CO2_mean = mean(CO2_g_d, na.rm = TRUE),
    CO2_sd = sd(CO2_g_d, na.rm = TRUE),
    CO2_se = CO2_sd / sqrt(n_obs),
    CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
    CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,

    .groups = "drop"
  ) %>%
  arrange(period_id)

# Print
cat("=== Mean CH4 and CO2 by Period x Treatment (15v4bins) ===\n")

## === Mean CH4 and CO2 by Period x Treatment (15v4bins) ===

print(period_means, n = nrow(period_means))

## # A tibble: 10 × 14
##    period_id grazing_treatment n_obs n_cows CH4_mean CH4_sd CH4_se CH4_lcl_95
##        <int> <chr>             <int>  <int>    <dbl>  <dbl>  <dbl>      <dbl>
##  1         1 K1                  354     16     325.   90.2   4.79       316.
##  2         1 K2                  309     11     372.   85.8   4.88       363.
##  3         2 K1                   54      3     266.   78.0  10.6        245.
##  4         2 K2                  184     10     339.   94.4   6.96       325.
##  5         3 K1                  162      9     311.   87.4   6.86       297.
##  6         3 K2                  306     14     337.   90.3   5.16       327.
##  7         4 K1                  521     18     329.   76.0   3.33       323.
##  8         4 K2                  298     15     342.   87.1   5.05       332.
##  9         5 K1                  336     16     273.   88.9   4.85       264.
## 10         5 K2                  235     12     308.   73.4   4.79       299.
## # ℹ 6 more variables: CH4_ucl_95 <dbl>, CO2_mean <dbl>, CO2_sd <dbl>,
## #   CO2_se <dbl>, CO2_lcl_95 <dbl>, CO2_ucl_95 <dbl>

cat("\n=== Mean CH4 and CO2 by Period (Overall) ===\n")

## 
## === Mean CH4 and CO2 by Period (Overall) ===

print(period_means_overall, n = nrow(period_means_overall))

## # A tibble: 5 × 13
##   period_id n_obs n_cows CH4_mean CH4_sd CH4_se CH4_lcl_95 CH4_ucl_95 CO2_mean
##       <int> <int>  <int>    <dbl>  <dbl>  <dbl>      <dbl>      <dbl>    <dbl>
## 1         1   663     27     347.   91.2   3.54       340.       354.   11325.
## 2         2   238     13     322.   95.8   6.21       310.       335.   10891.
## 3         3   468     23     328.   90.0   4.16       320.       336.   10490.
## 4         4   819     33     334.   80.4   2.81       328.       339.   10652.
## 5         5   571     28     288.   84.5   3.54       281.       295.    9773.
## # ℹ 4 more variables: CO2_sd <dbl>, CO2_se <dbl>, CO2_lcl_95 <dbl>,
## #   CO2_ucl_95 <dbl>

# Save CSV
write_csv(period_means, "period_means_CH4_CO2_by_treatment_15v4bins.csv")
write_csv(period_means_overall, "period_means_CH4_CO2_overall_15v4bins.csv")

# Save Excel
write_xlsx(
  list(
    period_by_treatment = period_means,
    period_overall = period_means_overall
  ),
  "period_means_CH4_CO2_15v4bins.xlsx"
)

# Compare and choose the best Covariance structure

library(readr)
library(dplyr)
library(nlme)

# ---------------------------------------
# Covariance structure comparison (15v4bins)
# Includes corrected AR1 with within-cow index
# ---------------------------------------

dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
    period_id = factor(period_id, levels = as.character(1:5)),
    start_time_num = as.numeric(start_time),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  ) %>%
  filter(
    !is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
    is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
  ) %>%
  arrange(cow_id, start_time_num) %>%
  group_by(cow_id) %>%
  mutate(obs_index = row_number()) %>%   # AR1-safe unique integer index within cow
  ungroup()

calc_caic <- function(mod) {
  ll <- as.numeric(logLik(mod))
  np <- attr(logLik(mod), "df")
  n <- nrow(mod$data)
  nf <- length(fixef(mod))
  -2 * ll + np * (log(n - nf) + 1)
}

fit_cov_set <- function(resp) {
  fml <- as.formula(paste0(resp, " ~ grazing_treatment + period_id + grazing_treatment:period_id"))

  safe_fit <- function(expr) {
    tryCatch(expr, error = function(e) structure(list(err = conditionMessage(e)), class = "fit_err"))
  }

  mod_ar1 <- safe_fit(
    lme(
      fixed = fml,
      random = ~1 | cow_id,
      data = dat,
      correlation = corAR1(form = ~ obs_index | cow_id),
      method = "REML",
      na.action = na.exclude,
      control = lmeControl(msMaxIter = 200, opt = "optim")
    )
  )

  mod_cs <- safe_fit(
    lme(
      fixed = fml,
      random = ~1 | cow_id,
      data = dat,
      correlation = corCompSymm(form = ~ obs_index | cow_id),
      method = "REML",
      na.action = na.exclude,
      control = lmeControl(msMaxIter = 200, opt = "optim")
    )
  )

  mod_pow <- safe_fit(
    lme(
      fixed = fml,
      random = ~1 | cow_id,
      data = dat,
      correlation = corExp(form = ~ start_time_num | cow_id),
      method = "REML",
      na.action = na.exclude,
      control = lmeControl(msMaxIter = 200, opt = "optim")
    )
  )

  mods <- list(AR1 = mod_ar1, CompSymm = mod_cs, SP_POW = mod_pow)

  out <- bind_rows(lapply(names(mods), function(s) {
    m <- mods[[s]]
    if (inherits(m, "fit_err")) {
      return(data.frame(
        response = resp, structure = s,
        AIC = NA, BIC = NA, CAIC = NA, logLik = NA, npar = NA,
        status = paste("failed:", m$err), stringsAsFactors = FALSE
      ))
    }
    data.frame(
      response = resp, structure = s,
      AIC = AIC(m), BIC = BIC(m), CAIC = calc_caic(m),
      logLik = as.numeric(logLik(m)),
      npar = attr(logLik(m), "df"),
      status = "ok",
      stringsAsFactors = FALSE
    )
  }))

  ok <- out %>% filter(status == "ok")
  if (nrow(ok) > 0) {
    out <- out %>%
      mutate(
        delta_AIC = ifelse(status == "ok", AIC - min(ok$AIC), NA_real_),
        delta_BIC = ifelse(status == "ok", BIC - min(ok$BIC), NA_real_),
        delta_CAIC = ifelse(status == "ok", CAIC - min(ok$CAIC), NA_real_)
      )
  } else {
    out$delta_AIC <- NA_real_
    out$delta_BIC <- NA_real_
    out$delta_CAIC <- NA_real_
  }

  out
}

res <- bind_rows(
  fit_cov_set("CH4_g_d"),
  fit_cov_set("CO2_g_d")
)

print(res)

##   response structure      AIC      BIC     CAIC    logLik npar status delta_AIC
## 1  CH4_g_d       AR1 31959.56 32036.51 32049.51 -15966.78   13     ok   0.00000
## 2  CH4_g_d  CompSymm 31989.27 32066.22 32079.22 -15981.63   13     ok  29.70721
## 3  CH4_g_d    SP_POW 31987.14 32064.09 32077.09 -15980.57   13     ok  27.58209
## 4  CO2_g_d       AR1 47101.83 47178.78 47191.78 -23537.92   13     ok   0.00000
## 5  CO2_g_d  CompSymm 47155.22 47232.17 47245.17 -23564.61   13     ok  53.39202
## 6  CO2_g_d    SP_POW 47155.22 47232.17 47245.17 -23564.61   13     ok  53.38580
##   delta_BIC delta_CAIC
## 1   0.00000    0.00000
## 2  29.70721   29.70721
## 3  27.58209   27.58209
## 4   0.00000    0.00000
## 5  53.39202   53.39202
## 6  53.38580   53.38580

cat("\nBest by CAIC among successful fits:\n")

## 
## Best by CAIC among successful fits:

print(
  res %>%
    filter(status == "ok") %>%
    group_by(response) %>%
    slice_min(order_by = CAIC, n = 1, with_ties = FALSE) %>%
    select(response, structure, AIC, BIC, CAIC)
)

## # A tibble: 2 × 5
## # Groups:   response [2]
##   response structure    AIC    BIC   CAIC
##   <chr>    <chr>      <dbl>  <dbl>  <dbl>
## 1 CH4_g_d  AR1       31960. 32037. 32050.
## 2 CO2_g_d  AR1       47102. 47179. 47192.

write_csv(res, "covariance_structure_comparison_15v4bins_with_AR1fix.csv")

#Check for model stability
library(readr)
library(dplyr)
library(nlme)

# -----------------------------
# Load filtered data (15v4bins)
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
    period_id = factor(period_id, levels = as.character(1:5)),
    start_time_num = as.numeric(start_time),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  ) %>%
  filter(
    !is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
    is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
  ) %>%
  arrange(cow_id, start_time_num) %>%
  group_by(cow_id) %>%
  mutate(obs_index = row_number()) %>%   # for AR1 within cow
  ungroup()

# -----------------------------
# Fit selected AR1 models
# -----------------------------
fit_ar1 <- function(resp) {
  fml <- as.formula(paste0(resp, " ~ grazing_treatment + period_id + grazing_treatment:period_id"))

  m <- lme(
    fixed = fml,
    random = ~1 | cow_id,
    data = dat,
    correlation = corAR1(form = ~ obs_index | cow_id),
    method = "REML",
    na.action = na.exclude,
    control = lmeControl(msMaxIter = 200, opt = "optim")
  )

  # Extract key diagnostics
  vc <- VarCorr(m)
  sigma_resid <- m$sigma
  ar1_phi <- as.numeric(coef(m$modelStruct$corStruct, unconstrained = FALSE))

  # Basic checks
  check_tbl <- tibble::tibble(
    response = resp,
    n_obs = nrow(dat),
    n_cows = dplyr::n_distinct(dat$cow_id),
    AIC = AIC(m),
    BIC = BIC(m),
    logLik = as.numeric(logLik(m)),
    residual_sigma = sigma_resid,
    ar1_phi = ar1_phi,
    ar1_in_valid_range = (ar1_phi > -1 & ar1_phi < 1),
    random_intercept_var = as.numeric(vc[1, "Variance"]),
    random_var_positive = as.numeric(vc[1, "Variance"]) > 0
  )

  list(model = m, checks = check_tbl)
}

ch4_fit <- fit_ar1("CH4_g_d")
co2_fit <- fit_ar1("CO2_g_d")

checks <- bind_rows(ch4_fit$checks, co2_fit$checks)

cat("=== AR1 MODEL STABILITY CHECKS ===\n")

## === AR1 MODEL STABILITY CHECKS ===

print(checks)

## # A tibble: 2 × 11
##   response n_obs n_cows    AIC    BIC  logLik residual_sigma ar1_phi
##   <chr>    <int>  <int>  <dbl>  <dbl>   <dbl>          <dbl>   <dbl>
## 1 CH4_g_d   2759     35 31960. 32037. -15967.           79.1   0.106
## 2 CO2_g_d   2759     35 47102. 47179. -23538.         1242.    0.141
## # ℹ 3 more variables: ar1_in_valid_range <lgl>, random_intercept_var <dbl>,
## #   random_var_positive <lgl>

cat("\n=== CH4 model summary ===\n")

## 
## === CH4 model summary ===

print(summary(ch4_fit$model))

## Linear mixed-effects model fit by REML
##   Data: dat 
##        AIC      BIC    logLik
##   31959.56 32036.51 -15966.78
## 
## Random effects:
##  Formula: ~1 | cow_id
##         (Intercept) Residual
## StdDev:    33.80667 79.09526
## 
## Correlation Structure: AR(1)
##  Formula: ~obs_index | cow_id 
##  Parameter estimate(s):
##       Phi 
## 0.1057705 
## Fixed effects:  list(fml) 
##                                   Value Std.Error   DF  t-value p-value
## (Intercept)                    326.4029  9.185077 2716 35.53622  0.0000
## grazing_treatmentK2             49.0961 13.608625   33  3.60772  0.0010
## period_id2                     -36.8234 13.518622 2716 -2.72390  0.0065
## period_id3                     -13.1066  8.486922 2716 -1.54432  0.1226
## period_id4                       2.2532  6.104921 2716  0.36908  0.7121
## period_id5                     -52.2423  6.860083 2716 -7.61540  0.0000
## grazing_treatmentK2:period_id2  -3.2627 15.905696 2716 -0.20513  0.8375
## grazing_treatmentK2:period_id3 -30.3006 11.219057 2716 -2.70082  0.0070
## grazing_treatmentK2:period_id4 -40.7164  9.620034 2716 -4.23246  0.0000
## grazing_treatmentK2:period_id5 -14.2470 10.306304 2716 -1.38236  0.1670
##  Correlation: 
##                                (Intr) grz_K2 prd_d2 prd_d3 prd_d4 prd_d5
## grazing_treatmentK2            -0.675                                   
## period_id2                     -0.167  0.113                            
## period_id3                     -0.269  0.181  0.223                     
## period_id4                     -0.407  0.274  0.255  0.410              
## period_id5                     -0.373  0.252  0.222  0.356  0.535       
## grazing_treatmentK2:period_id2  0.142 -0.219 -0.850 -0.189 -0.217 -0.189
## grazing_treatmentK2:period_id3  0.203 -0.320 -0.169 -0.756 -0.310 -0.269
## grazing_treatmentK2:period_id4  0.258 -0.397 -0.162 -0.260 -0.635 -0.339
## grazing_treatmentK2:period_id5  0.249 -0.355 -0.148 -0.237 -0.356 -0.666
##                                g_K2:_2 g_K2:_3 g_K2:_4
## grazing_treatmentK2                                   
## period_id2                                            
## period_id3                                            
## period_id4                                            
## period_id5                                            
## grazing_treatmentK2:period_id2                        
## grazing_treatmentK2:period_id3  0.293                 
## grazing_treatmentK2:period_id4  0.317   0.460         
## grazing_treatmentK2:period_id5  0.279   0.410   0.492 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.46835653 -0.67306515 -0.02186649  0.62079946  3.52945008 
## 
## Number of Observations: 2759
## Number of Groups: 35

cat("\n=== CO2 model summary ===\n")

## 
## === CO2 model summary ===

print(summary(co2_fit$model))

## Linear mixed-effects model fit by REML
##   Data: dat 
##        AIC      BIC    logLik
##   47101.83 47178.78 -23537.92
## 
## Random effects:
##  Formula: ~1 | cow_id
##         (Intercept) Residual
## StdDev:    837.2731  1242.23
## 
## Correlation Structure: AR(1)
##  Formula: ~obs_index | cow_id 
##  Parameter estimate(s):
##       Phi 
## 0.1414196 
## Fixed effects:  list(fml) 
##                                    Value Std.Error   DF   t-value p-value
## (Intercept)                    11288.444 208.28320 2716  54.19758  0.0000
## grazing_treatmentK2              314.486 308.36924   33   1.01984  0.3152
## period_id2                      -361.216 219.77902 2716  -1.64354  0.1004
## period_id3                      -799.809 137.85735 2716  -5.80171  0.0000
## period_id4                      -625.351  99.30279 2716  -6.29741  0.0000
## period_id5                     -1574.090 111.79395 2716 -14.08028  0.0000
## grazing_treatmentK2:period_id2  -217.535 258.56148 2716  -0.84133  0.4002
## grazing_treatmentK2:period_id3  -283.765 182.42591 2716  -1.55551  0.1199
## grazing_treatmentK2:period_id4  -453.201 156.74402 2716  -2.89134  0.0039
## grazing_treatmentK2:period_id5   -65.925 167.76727 2716  -0.39295  0.6944
##  Correlation: 
##                                (Intr) grz_K2 prd_d2 prd_d3 prd_d4 prd_d5
## grazing_treatmentK2            -0.675                                   
## period_id2                     -0.119  0.080                            
## period_id3                     -0.192  0.129  0.225                     
## period_id4                     -0.292  0.198  0.253  0.409              
## period_id5                     -0.270  0.182  0.220  0.353  0.534       
## grazing_treatmentK2:period_id2  0.101 -0.157 -0.850 -0.191 -0.215 -0.187
## grazing_treatmentK2:period_id3  0.145 -0.230 -0.170 -0.756 -0.309 -0.267
## grazing_treatmentK2:period_id4  0.185 -0.287 -0.161 -0.259 -0.634 -0.339
## grazing_treatmentK2:period_id5  0.180 -0.256 -0.147 -0.235 -0.356 -0.666
##                                g_K2:_2 g_K2:_3 g_K2:_4
## grazing_treatmentK2                                   
## period_id2                                            
## period_id3                                            
## period_id4                                            
## period_id5                                            
## grazing_treatmentK2:period_id2                        
## grazing_treatmentK2:period_id3  0.295                 
## grazing_treatmentK2:period_id4  0.316   0.461         
## grazing_treatmentK2:period_id5  0.277   0.409   0.492 
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.45605348 -0.66720951 -0.05787612  0.66611221  3.81234532 
## 
## Number of Observations: 2759
## Number of Groups: 35

# Save checks
write_csv(checks, "ar1_model_stability_checks_15v4bins.csv")

# Check fixed effects using AR1 model
library(readr)
library(dplyr)
library(nlme)
library(emmeans)

# -----------------------------
# Load filtered data
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
    period_id = factor(period_id, levels = as.character(1:5)),
    start_time_num = as.numeric(start_time),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  ) %>%
  filter(
    !is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
    is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
  ) %>%
  arrange(cow_id, start_time_num) %>%
  group_by(cow_id) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

fit_ar1 <- function(resp) {
  fml <- as.formula(paste0(resp, " ~ grazing_treatment * period_id"))

  lme(
    fixed = fml,
    random = ~1 | cow_id,
    correlation = corAR1(form = ~ obs_index | cow_id),
    data = dat,
    method = "REML",
    na.action = na.exclude,
    control = lmeControl(msMaxIter = 200, opt = "optim")
  )
}

# -----------------------------
# Fit AR1 models
# -----------------------------
m_ch4 <- fit_ar1("CH4_g_d")
m_co2 <- fit_ar1("CO2_g_d")

# -----------------------------
# Fixed effects significance
# -----------------------------
cat("=== CH4: Fixed effects (Type-like ANOVA table) ===\n")

## === CH4: Fixed effects (Type-like ANOVA table) ===

print(anova(m_ch4))

##                             numDF denDF   F-value p-value
## (Intercept)                     1  2716 2901.7546  <.0001
## grazing_treatment               1    33    5.2967  0.0278
## period_id                       4  2716   37.0500  <.0001
## grazing_treatment:period_id     4  2716    5.3236  0.0003

cat("\n=== CO2: Fixed effects (Type-like ANOVA table) ===\n")

## 
## === CO2: Fixed effects (Type-like ANOVA table) ===

print(anova(m_co2))

##                             numDF denDF  F-value p-value
## (Intercept)                     1  2716 5375.457  <.0001
## grazing_treatment               1    33    0.353  0.5563
## period_id                       4  2716   96.904  <.0001
## grazing_treatment:period_id     4  2716    2.515  0.0397

# -----------------------------
# Interaction-focused contrasts
# K1 vs K2 within each period
# -----------------------------
cat("\n=== CH4: K1 vs K2 within each period ===\n")

## 
## === CH4: K1 vs K2 within each period ===

emm_ch4 <- emmeans(m_ch4, ~ grazing_treatment | period_id)
ch4_contr <- contrast(emm_ch4, method = "revpairwise", adjust = "tukey")
print(ch4_contr)

## period_id = 1:
##  contrast estimate   SE df t.ratio p.value
##  K2 - K1     49.10 13.6 33   3.608  0.0010
## 
## period_id = 2:
##  contrast estimate   SE df t.ratio p.value
##  K2 - K1     45.83 18.5 33   2.473  0.0187
## 
## period_id = 3:
##  contrast estimate   SE df t.ratio p.value
##  K2 - K1     18.80 14.6 33   1.287  0.2070
## 
## period_id = 4:
##  contrast estimate   SE df t.ratio p.value
##  K2 - K1      8.38 13.2 33   0.636  0.5294
## 
## period_id = 5:
##  contrast estimate   SE df t.ratio p.value
##  K2 - K1     34.85 13.9 33   2.516  0.0169
## 
## Degrees-of-freedom method: containment

cat("\n=== CO2: K1 vs K2 within each period ===\n")

## 
## === CO2: K1 vs K2 within each period ===

emm_co2 <- emmeans(m_co2, ~ grazing_treatment | period_id)
co2_contr <- contrast(emm_co2, method = "revpairwise", adjust = "tukey")
print(co2_contr)

## period_id = 1:
##  contrast estimate  SE df t.ratio p.value
##  K2 - K1     314.5 308 33   1.020  0.3152
## 
## period_id = 2:
##  contrast estimate  SE df t.ratio p.value
##  K2 - K1      97.0 370 33   0.262  0.7950
## 
## period_id = 3:
##  contrast estimate  SE df t.ratio p.value
##  K2 - K1      30.7 320 33   0.096  0.9241
## 
## period_id = 4:
##  contrast estimate  SE df t.ratio p.value
##  K2 - K1    -138.7 303 33  -0.458  0.6503
## 
## period_id = 5:
##  contrast estimate  SE df t.ratio p.value
##  K2 - K1     248.6 311 33   0.799  0.4300
## 
## Degrees-of-freedom method: containment

# -----------------------------
# Save tables
# -----------------------------
ch4_anova <- as.data.frame(anova(m_ch4))
co2_anova <- as.data.frame(anova(m_co2))
ch4_anova$term <- rownames(ch4_anova)
co2_anova$term <- rownames(co2_anova)

ch4_contr_df <- as.data.frame(ch4_contr)
co2_contr_df <- as.data.frame(co2_contr)

readr::write_csv(ch4_anova, "CH4_AR1_fixed_effects_anova.csv")
readr::write_csv(co2_anova, "CO2_AR1_fixed_effects_anova.csv")
readr::write_csv(ch4_contr_df, "CH4_AR1_K1vsK2_by_period.csv")
readr::write_csv(co2_contr_df, "CO2_AR1_K1vsK2_by_period.csv")

#Checking fixed effect using Type 3 ANOVA for K1 and K2.
library(readr)
library(dplyr)
library(gt)
library(openxlsx)

fmt_p <- function(p) ifelse(is.na(p), NA, ifelse(p < 0.001, "<0.001", sprintf("%.3f", p)))
sig_code <- function(p) case_when(
  is.na(p)  ~ "",
  p < 0.001 ~ "***",
  p < 0.01  ~ "**",
  p < 0.05  ~ "*",
  p < 0.10  ~ ".",
  TRUE      ~ ""
)

clean_nm <- function(x) {
  x |>
    tolower() |>
    gsub("[^a-z0-9]+", "_", x = _) |>
    gsub("^_|_$", "", x = _)
}

read_aov_clean <- function(path, response_label) {
  x <- read_csv(path, show_col_types = FALSE)

  # Keep original names, but create cleaned map for robust matching
  nm_orig <- names(x)
  nm_clean <- clean_nm(nm_orig)

  pick_col <- function(patterns) {
    idx <- which(vapply(patterns, function(p) any(grepl(p, nm_clean)), logical(1)))
    if (length(idx) == 0) return(NA_integer_)
    which(grepl(patterns[idx[1]], nm_clean))[1]
  }

  i_term  <- pick_col(c("^effect$", "^term$"))
  i_numdf <- pick_col(c("^numdf$", "^num_df$"))
  i_dendf <- pick_col(c("^dendf$", "^den_df$"))
  i_f     <- pick_col(c("^f_value$", "^f$"))
  i_p     <- pick_col(c("^p_value$", "^p$", "^pr_f$"))

  if (any(is.na(c(i_term, i_numdf, i_dendf, i_f, i_p)))) {
    cat("\nColumn detection failed for:", path, "\n")
    print(data.frame(original = nm_orig, cleaned = nm_clean))
    stop("Could not identify required ANOVA columns. See printed column map.")
  }

  x %>%
    mutate(response = response_label) %>%
    transmute(
      response = response,
      term    = .data[[nm_orig[i_term]]],
      num_df  = as.numeric(.data[[nm_orig[i_numdf]]]),
      den_df  = as.numeric(.data[[nm_orig[i_dendf]]]),
      F_value = as.numeric(.data[[nm_orig[i_f]]]),
      p_raw   = suppressWarnings(as.numeric(.data[[nm_orig[i_p]]])) # handles "<.0001" as NA
    ) %>%
    mutate(
      p_value = ifelse(is.na(p_raw), as.character(.data[[nm_orig[i_p]]]), fmt_p(p_raw)),
      sig = sig_code(p_raw)
    ) %>%
    select(-p_raw)
}

aov_tbl <- bind_rows(
  read_aov_clean("CH4_AR1_fixed_effects_anova.csv", "CH4_g_d"),
  read_aov_clean("CO2_AR1_fixed_effects_anova.csv", "CO2_g_d")
)

gt_aov <- aov_tbl %>%
  gt(groupname_col = "response") %>%
  tab_header(
    title = "Fixed Effects (AR1 Model)",
    subtitle = "Type-like ANOVA"
  ) %>%
  fmt_number(columns = c(num_df, den_df), decimals = 0) %>%
  fmt_number(columns = F_value, decimals = 3)

gtsave(gt_aov, "table_fixed_effects_anova.html")

wb <- createWorkbook()
addWorksheet(wb, "Fixed_Effects_ANOVA")
writeData(wb, "Fixed_Effects_ANOVA", aov_tbl)
saveWorkbook(wb, "table_fixed_effects_anova.xlsx", overwrite = TRUE)

print(aov_tbl)

## # A tibble: 8 × 7
##   response term                        num_df den_df  F_value p_value sig  
##   <chr>    <chr>                        <dbl>  <dbl>    <dbl> <chr>   <chr>
## 1 CH4_g_d  (Intercept)                      1   2716 2902.    <0.001  "***"
## 2 CH4_g_d  grazing_treatment                1     33    5.30  0.028   "*"  
## 3 CH4_g_d  period_id                        4   2716   37.1   <0.001  "***"
## 4 CH4_g_d  grazing_treatment:period_id      4   2716    5.32  <0.001  "***"
## 5 CO2_g_d  (Intercept)                      1   2716 5375.    <0.001  "***"
## 6 CO2_g_d  grazing_treatment                1     33    0.353 0.556   ""   
## 7 CO2_g_d  period_id                        4   2716   96.9   <0.001  "***"
## 8 CO2_g_d  grazing_treatment:period_id      4   2716    2.51  0.040   "*"

library(dplyr)
library(tibble)
library(readr)

anova_tbl <- tribble(
  ~response, ~term, ~num_df, ~den_df, ~F_value, ~p_value, ~sig,
  "CH4_g_d", "(Intercept)", 1, 2716, 2901.754598, "<0.001", "***",
  "CH4_g_d", "grazing_treatment", 1, 33, 5.296669551, "0.028", "*",
  "CH4_g_d", "period_id", 4, 2716, 37.05002376, "<0.001", "***",
  "CH4_g_d", "grazing_treatment:period_id", 4, 2716, 5.323578645, "<0.001", "***",
  "CO2_g_d", "(Intercept)", 1, 2716, 5375.456905, "<0.001", "***",
  "CO2_g_d", "grazing_treatment", 1, 33, 0.3533128301, "0.556", "",
  "CO2_g_d", "period_id", 4, 2716, 96.90369902, "<0.001", "***",
  "CO2_g_d", "grazing_treatment:period_id", 4, 2716, 2.514783258, "0.040", "*"
)

anova_tbl2 <- anova_tbl %>%
  mutate(
    p_num = suppressWarnings(as.numeric(gsub("^<", "", p_value))),
    interpretation = case_when(
      grepl("^<", p_value) ~ "Statistically significant",
      !is.na(p_num) & p_num < 0.05 ~ "Statistically significant",
      !is.na(p_num) & p_num >= 0.05 ~ "Not statistically significant",
      TRUE ~ "Check p-value format"
    )
  ) %>%
  select(-p_num)

print(anova_tbl2)

## # A tibble: 8 × 8
##   response term               num_df den_df F_value p_value sig   interpretation
##   <chr>    <chr>               <dbl>  <dbl>   <dbl> <chr>   <chr> <chr>         
## 1 CH4_g_d  (Intercept)             1   2716 2.90e+3 <0.001  "***" Statistically…
## 2 CH4_g_d  grazing_treatment       1     33 5.30e+0 0.028   "*"   Statistically…
## 3 CH4_g_d  period_id               4   2716 3.71e+1 <0.001  "***" Statistically…
## 4 CH4_g_d  grazing_treatment…      4   2716 5.32e+0 <0.001  "***" Statistically…
## 5 CO2_g_d  (Intercept)             1   2716 5.38e+3 <0.001  "***" Statistically…
## 6 CO2_g_d  grazing_treatment       1     33 3.53e-1 0.556   ""    Not statistic…
## 7 CO2_g_d  period_id               4   2716 9.69e+1 <0.001  "***" Statistically…
## 8 CO2_g_d  grazing_treatment…      4   2716 2.51e+0 0.040   "*"   Statistically…

write_csv(anova_tbl2, "anova_with_interpretation.csv")

library(dplyr)
library(stringr)

anova_tbl2 <- anova_tbl %>%
  mutate(
    p_num = suppressWarnings(as.numeric(str_replace(p_value, "^<", ""))),
    significance_level = case_when(
      str_detect(p_value, "^<0\\.001$") ~ "Highly significant",
      !is.na(p_num) & p_num < 0.001 ~ "Highly significant",
      !is.na(p_num) & p_num < 0.05  ~ "Moderately significant",
      !is.na(p_num) & p_num >= 0.05 ~ "Not significant",
      TRUE ~ "Check p-value format"
    )
  ) %>%
  select(-p_num)

print(anova_tbl2)

## # A tibble: 8 × 8
##   response term           num_df den_df F_value p_value sig   significance_level
##   <chr>    <chr>           <dbl>  <dbl>   <dbl> <chr>   <chr> <chr>             
## 1 CH4_g_d  (Intercept)         1   2716 2.90e+3 <0.001  "***" Highly significant
## 2 CH4_g_d  grazing_treat…      1     33 5.30e+0 0.028   "*"   Moderately signif…
## 3 CH4_g_d  period_id           4   2716 3.71e+1 <0.001  "***" Highly significant
## 4 CH4_g_d  grazing_treat…      4   2716 5.32e+0 <0.001  "***" Highly significant
## 5 CO2_g_d  (Intercept)         1   2716 5.38e+3 <0.001  "***" Highly significant
## 6 CO2_g_d  grazing_treat…      1     33 3.53e-1 0.556   ""    Not significant   
## 7 CO2_g_d  period_id           4   2716 9.69e+1 <0.001  "***" Highly significant
## 8 CO2_g_d  grazing_treat…      4   2716 2.51e+0 0.040   "*"   Moderately signif…

#Fit model (AR1)
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)

# 1) Find input file
candidates <- c(
  "analysis_3SD_14day_15v4bins.csv",
  "analysis_3SD_14day_15v4_passed.csv",
  "analysis_3SD_14day_passed_only.csv"
)
input_file <- candidates[file.exists(candidates)][1]
if (is.na(input_file)) stop("No input file found.")
cat("Using file:", input_file, "\n")

## Using file: analysis_3SD_14day_passed_only.csv

# 2) Load + prep
dat <- read_csv(input_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(grazing_treatment), levels = c("K1", "K2")),
    period_id = factor(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  ) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup() %>%
  filter(!is.na(grazing_treatment), !is.na(period_id))

# 3) Fit CH4 AR1
m_ch4 <- lme(
  fixed = CH4_g_d ~ grazing_treatment * period_id,
  random = ~ 1 | cow_id,
  correlation = corAR1(form = ~ obs_index | cow_id),
  data = dat,
  method = "REML",
  na.action = na.omit
)

# 4) Fit CO2 AR1
m_co2 <- lme(
  fixed = CO2_g_d ~ grazing_treatment * period_id,
  random = ~ 1 | cow_id,
  correlation = corAR1(form = ~ obs_index | cow_id),
  data = dat,
  method = "REML",
  na.action = na.omit
)

# 5) EMMs
emm_ch4 <- emmeans(m_ch4, ~ grazing_treatment | period_id)
emm_co2 <- emmeans(m_co2, ~ grazing_treatment | period_id)

emm_ch4_df <- as.data.frame(emm_ch4)
emm_co2_df <- as.data.frame(emm_co2)

# 6) K2 - K1 contrasts per period
ctr_ch4_df <- as.data.frame(contrast(emm_ch4, method = "revpairwise", adjust = "tukey"))
ctr_co2_df <- as.data.frame(contrast(emm_co2, method = "revpairwise", adjust = "tukey"))

# 7) Save
write_csv(emm_ch4_df, "CH4_AR1_emmeans_by_period_treatment.csv")
write_csv(emm_co2_df, "CO2_AR1_emmeans_by_period_treatment.csv")
write_csv(ctr_ch4_df, "CH4_AR1_K2_minus_K1_by_period_tukey.csv")
write_csv(ctr_co2_df, "CO2_AR1_K2_minus_K1_by_period_tukey.csv")

wb <- createWorkbook()
addWorksheet(wb, "CH4_emmeans")
addWorksheet(wb, "CO2_emmeans")
addWorksheet(wb, "CH4_K2_minus_K1")
addWorksheet(wb, "CO2_K2_minus_K1")
writeData(wb, "CH4_emmeans", emm_ch4_df)
writeData(wb, "CO2_emmeans", emm_co2_df)
writeData(wb, "CH4_K2_minus_K1", ctr_ch4_df)
writeData(wb, "CO2_K2_minus_K1", ctr_co2_df)
saveWorkbook(wb, "AR1_emmeans_and_contrasts_step1.xlsx", overwrite = TRUE)
cat("Done. Files saved.\n")

## Done. Files saved.

library(readr)
library(dplyr)
library(ggplot2)

# Read emmeans outputs from Step 1
ch4 <- read_csv("CH4_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
co2 <- read_csv("CO2_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)

plot_emm <- function(df, ylab, title, out_png, col_k1 = "forestgreen", col_k2 = "steelblue") {
  d <- df %>%
    mutate(
      period_id = factor(period_id, levels = sort(unique(as.numeric(as.character(period_id))))),
      grazing_treatment = factor(grazing_treatment, levels = c("K1", "K2"))
    )

  p <- ggplot(d, aes(x = period_id, y = emmean, color = grazing_treatment, group = grazing_treatment)) +
    geom_line(linewidth = 1.1) +
    geom_point(size = 2.8) +
    geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15, linewidth = 0.7) +
    scale_color_manual(values = c("K1" = col_k1, "K2" = col_k2)) +
    labs(
      title = title,
      x = "14-day period",
      y = ylab,
      color = "Treatment"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      legend.position = "top",
      panel.grid.minor = element_blank(),
      plot.title = element_text(face = "bold")
    )

  ggsave(out_png, p, width = 9, height = 6, dpi = 320)
  p
}

p_ch4 <- plot_emm(
  ch4,
  ylab = "Adjusted mean CH4 (g/day)",
  title = "CH4 Adjusted Means by Period and Treatment (AR1)",
  out_png = "CH4_AR1_interaction_plot.png"
)

p_co2 <- plot_emm(
  co2,
  ylab = "Adjusted mean CO2 (g/day)",
  title = "CO2 Adjusted Means by Period and Treatment (AR1)",
  out_png = "CO2_AR1_interaction_plot.png"
)

print(p_ch4)

print(p_co2)

cat("Saved:\n- CH4_AR1_interaction_plot.png\n- CO2_AR1_interaction_plot.png\n")

## Saved:
## - CH4_AR1_interaction_plot.png
## - CO2_AR1_interaction_plot.png

library(readr)
library(dplyr)
library(openxlsx)

# Inputs from Step 1
emm_ch4 <- read_csv("CH4_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
emm_co2 <- read_csv("CO2_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
ctr_ch4 <- read_csv("CH4_AR1_K2_minus_K1_by_period_tukey.csv", show_col_types = FALSE)
ctr_co2 <- read_csv("CO2_AR1_K2_minus_K1_by_period_tukey.csv", show_col_types = FALSE)

make_effect_table <- function(emm_df, ctr_df, response_label) {
  k1 <- emm_df %>%
    filter(grazing_treatment == "K1") %>%
    select(period_id, K1_emmean = emmean)

  ci_mult <- qt(0.975, df = ctr_df$df)

  ctr_df %>%
    left_join(k1, by = "period_id") %>%
    mutate(
      response = response_label,
      lower.CL = estimate - ci_mult * SE,
      upper.CL = estimate + ci_mult * SE,
      pct_diff_vs_K1 = 100 * estimate / K1_emmean,
      sig = case_when(
        p.value < 0.001 ~ "***",
        p.value < 0.01  ~ "**",
        p.value < 0.05  ~ "*",
        p.value < 0.10  ~ ".",
        TRUE ~ ""
      ),
      significance = case_when(
        p.value < 0.001 ~ "Highly significant",
        p.value < 0.05  ~ "Moderately significant",
        TRUE ~ "Not significant"
      )
    ) %>%
    select(response, period_id, contrast, estimate, lower.CL, upper.CL, K1_emmean,
           pct_diff_vs_K1, SE, df, p.value, sig, significance) %>%
    arrange(period_id)
}

eff_ch4 <- make_effect_table(emm_ch4, ctr_ch4, "CH4_g_d")
eff_co2 <- make_effect_table(emm_co2, ctr_co2, "CO2_g_d")
eff_all <- bind_rows(eff_ch4, eff_co2)

print(eff_all)

## # A tibble: 10 × 13
##    response period_id contrast estimate lower.CL upper.CL K1_emmean
##    <chr>        <dbl> <chr>       <dbl>    <dbl>    <dbl>     <dbl>
##  1 CH4_g_d          1 K2 - K1     47.5     18.0      77.0      336.
##  2 CH4_g_d          2 K2 - K1     30.4    -23.0      83.9      300.
##  3 CH4_g_d          3 K2 - K1     37.1     -3.93     78.1      307.
##  4 CH4_g_d          4 K2 - K1      7.40   -23.2      38.0      333.
##  5 CH4_g_d          5 K2 - K1     59.9     26.6      93.1      272.
##  6 CO2_g_d          1 K2 - K1    259.    -412.      930.     11497.
##  7 CO2_g_d          2 K2 - K1    -20.5  -1004.      963.     11081.
##  8 CO2_g_d          3 K2 - K1    135.    -679.      949.     10553.
##  9 CO2_g_d          4 K2 - K1   -143.    -826.      541.     10748.
## 10 CO2_g_d          5 K2 - K1    565.    -151.     1281.      9733.
## # ℹ 6 more variables: pct_diff_vs_K1 <dbl>, SE <dbl>, df <dbl>, p.value <dbl>,
## #   sig <chr>, significance <chr>

write_csv(eff_ch4, "CH4_effect_sizes_K2_minus_K1_by_period.csv")
write_csv(eff_co2, "CO2_effect_sizes_K2_minus_K1_by_period.csv")
write_csv(eff_all, "effect_sizes_K2_minus_K1_by_period_all.csv")

wb <- createWorkbook()
addWorksheet(wb, "CH4_effect_sizes")
addWorksheet(wb, "CO2_effect_sizes")
addWorksheet(wb, "All")
writeData(wb, "CH4_effect_sizes", eff_ch4)
writeData(wb, "CO2_effect_sizes", eff_co2)
writeData(wb, "All", eff_all)
saveWorkbook(wb, "effect_sizes_K2_minus_K1_by_period.xlsx", overwrite = TRUE)

# Compute herd CH4 using TOD for K1 vs K2
library(readr)
library(dplyr)
library(openxlsx)

# Inputs (15v4 rule)
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"   # cow-periods that passed 15 visits + 4 bins

vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

# Keep only qualified cow-periods
dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))

# TOD method:
# 1) mean CH4 within each 4h bin per cow-period
bin_means <- dat %>%
  group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
  summarise(
    n_visits_bin = n(),
    CH4_bin_mean = mean(CH4_g_d, na.rm = TRUE),
    .groups = "drop"
  )

# 2) mean of bin means per cow-period (equal weight per bin)
tod_cow_period <- bin_means %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(
    n_bins_used = n(),
    TOD_CH4_mean_period = mean(CH4_bin_mean, na.rm = TRUE),
    .groups = "drop"
  )

# 3) overall TOD mean CH4 per cow across its qualified periods
tod_cow_overall <- tod_cow_period %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(
    n_periods = n(),
    TOD_CH4_mean_overall = mean(TOD_CH4_mean_period, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(grazing_treatment, desc(TOD_CH4_mean_overall))

# Optional treatment summary
tod_treatment_summary <- tod_cow_overall %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_cows = n(),
    mean_TOD_CH4 = mean(TOD_CH4_mean_overall, na.rm = TRUE),
    sd_TOD_CH4 = sd(TOD_CH4_mean_overall, na.rm = TRUE),
    .groups = "drop"
  )

cat("Qualified unique cows (15v4):", n_distinct(tod_cow_overall$cow_id), "\n")

## Qualified unique cows (15v4): 35

# Save
write_csv(bin_means, "TOD_CH4_bin_means_15v4.csv")
write_csv(tod_cow_period, "TOD_CH4_mean_by_cow_period_15v4.csv")
write_csv(tod_cow_overall, "TOD_CH4_mean_by_cow_overall_15v4.csv")
write_csv(tod_treatment_summary, "TOD_CH4_summary_by_treatment_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Bin_Means")
addWorksheet(wb, "Cow_Period_TOD")
addWorksheet(wb, "Cow_Overall_TOD")
addWorksheet(wb, "Treatment_Summary")
writeData(wb, "Bin_Means", bin_means)
writeData(wb, "Cow_Period_TOD", tod_cow_period)
writeData(wb, "Cow_Overall_TOD", tod_cow_overall)
writeData(wb, "Treatment_Summary", tod_treatment_summary)
saveWorkbook(wb, "TOD_CH4_results_15v4.xlsx", overwrite = TRUE)

print(head(tod_cow_overall, 10))

## # A tibble: 10 × 4
##    cow_id grazing_treatment n_periods TOD_CH4_mean_overall
##    <chr>  <chr>                 <int>                <dbl>
##  1 290G   K1                        3                 372.
##  2 266H   K1                        4                 366.
##  3 364B   K1                        3                 343.
##  4 312E   K1                        4                 342.
##  5 322F   K1                        3                 340.
##  6 454C   K1                        1                 331.
##  7 338J   K1                        3                 328.
##  8 410C   K1                        3                 325.
##  9 314J   K1                        5                 315.
## 10 370H   K1                        4                 309.

print(tod_treatment_summary)

## # A tibble: 2 × 4
##   grazing_treatment n_cows mean_TOD_CH4 sd_TOD_CH4
##   <chr>              <int>        <dbl>      <dbl>
## 1 K1                    19         311.       38.0
## 2 K2                    16         339.       37.1

# Estimate CH4 using AM for TOD data
library(readr)
library(dplyr)
library(openxlsx)

# Use the exact same qualified TOD dataset (15v4)
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d)
  ) %>%
  filter(is.finite(CH4_g_d))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

# Exact rows used for TOD eligibility
dat_tod <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))

# Herd-level AM CH4 from exact same rows
herd_am <- dat_tod %>%
  summarise(
    n_rows = n(),
    n_cows = n_distinct(cow_id),
    n_periods = n_distinct(period_id),
    CH4_mean_AM_herd = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd_AM_herd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se_AM_herd = CH4_sd_AM_herd / sqrt(n_rows),
    CH4_lcl_95 = CH4_mean_AM_herd - 1.96 * CH4_se_AM_herd,
    CH4_ucl_95 = CH4_mean_AM_herd + 1.96 * CH4_se_AM_herd
  )

# Optional by treatment
herd_am_by_trt <- dat_tod %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_rows = n(),
    n_cows = n_distinct(cow_id),
    CH4_mean_AM = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd_AM = sd(CH4_g_d, na.rm = TRUE),
    CH4_se_AM = CH4_sd_AM / sqrt(n_rows),
    CH4_lcl_95 = CH4_mean_AM - 1.96 * CH4_se_AM,
    CH4_ucl_95 = CH4_mean_AM + 1.96 * CH4_se_AM,
    .groups = "drop"
  )

print(herd_am)

## # A tibble: 1 × 8
##   n_rows n_cows n_periods CH4_mean_AM_herd CH4_sd_AM_herd CH4_se_AM_herd
##    <int>  <int>     <int>            <dbl>          <dbl>          <dbl>
## 1   2759     35         5             326.           89.4           1.70
## # ℹ 2 more variables: CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>

print(herd_am_by_trt)

## # A tibble: 2 × 8
##   grazing_treatment n_rows n_cows CH4_mean_AM CH4_sd_AM CH4_se_AM CH4_lcl_95
##   <chr>              <int>  <int>       <dbl>     <dbl>     <dbl>      <dbl>
## 1 K1                  1427     19        311.      87.4      2.31       306.
## 2 K2                  1332     16        341.      88.7      2.43       337.
## # ℹ 1 more variable: CH4_ucl_95 <dbl>

write_csv(herd_am, "herd_CH4_AM_from_TOD_exact_rows_15v4.csv")
write_csv(herd_am_by_trt, "herd_CH4_AM_by_treatment_from_TOD_exact_rows_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Herd_AM_Overall")
addWorksheet(wb, "Herd_AM_By_Treatment")
writeData(wb, "Herd_AM_Overall", herd_am)
writeData(wb, "Herd_AM_By_Treatment", herd_am_by_trt)
saveWorkbook(wb, "herd_CH4_AM_from_TOD_exact_rows_15v4.xlsx", overwrite = TRUE)

#Use LSMeans for the same estimation
library(readr)
library(dplyr)
library(lme4)
library(emmeans)

# -----------------------------
# 1) Load exact TOD-qualified 15v4 data used for model
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(period_id),
    CH4_g_d = as.numeric(CH4_g_d)
  ) %>%
  filter(is.finite(CH4_g_d))

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = factor(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  droplevels()

# -----------------------------
# 2) Fit model used for treatment LSMeans
# -----------------------------
m_ch4 <- lmer(
  CH4_g_d ~ grazing_treatment * period_id + (1 | cow_id),
  data = dat,
  REML = TRUE
)

# -----------------------------
# 3) Get treatment LSMeans
# -----------------------------
emm_trt <- as.data.frame(emmeans(m_ch4, ~ grazing_treatment))
print(emm_trt)

##  grazing_treatment   emmean       SE    df lower.CL upper.CL
##  K1                306.4828 8.481591 38.16 289.3151 323.6506
##  K2                337.6802 8.892241 32.81 319.5848 355.7756
## 
## Results are averaged over the levels of: period_id 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

# -----------------------------
# 4) Add sample numbers (rows + cows) used in model
# -----------------------------
counts_by_trt <- dat %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_rows = n(),
    n_cows = n_distinct(cow_id),
    .groups = "drop"
  )

emm_with_n <- emm_trt %>%
  left_join(counts_by_trt, by = "grazing_treatment")

cat("\n=== Sample size used for each treatment LSMean ===\n")

## 
## === Sample size used for each treatment LSMean ===

print(emm_with_n)

##  grazing_treatment   emmean       SE    df lower.CL upper.CL n_rows n_cows
##  K1                306.4828 8.481591 38.16 289.3151 323.6506   1427     19
##  K2                337.6802 8.892241 32.81 319.5848 355.7756   1332     16
## 
## Results are averaged over the levels of: period_id 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

cat("\n=== Overall sample size used in model ===\n")

## 
## === Overall sample size used in model ===

cat("Total rows:", nrow(dat), "\n")

## Total rows: 2759

cat("Total cows:", n_distinct(dat$cow_id), "\n")

## Total cows: 35

cat("Total periods:", n_distinct(dat$period_id), "\n")

## Total periods: 5

# -----------------------------
# 5) Optional: period-wise counts
# -----------------------------
counts_trt_period <- dat %>%
  group_by(grazing_treatment, period_id) %>%
  summarise(
    n_rows = n(),
    n_cows = n_distinct(cow_id),
    .groups = "drop"
  )

cat("\n=== Counts by treatment x period ===\n")

## 
## === Counts by treatment x period ===

print(counts_trt_period)

## # A tibble: 10 × 4
##    grazing_treatment period_id n_rows n_cows
##    <fct>             <fct>      <int>  <int>
##  1 K1                1            354     16
##  2 K1                2             54      3
##  3 K1                3            162      9
##  4 K1                4            521     18
##  5 K1                5            336     16
##  6 K2                1            309     11
##  7 K2                2            184     10
##  8 K2                3            306     14
##  9 K2                4            298     15
## 10 K2                5            235     12

# -----------------------------
# 6) Save outputs
# -----------------------------
readr::write_csv(emm_with_n, "CH4_treatment_lsmeans_with_sample_sizes_15v4.csv")
readr::write_csv(counts_trt_period, "CH4_counts_by_treatment_period_15v4.csv")

# Compare the three methods
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(tidyr)
library(openxlsx)

# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# -----------------------------
# Load and align exact analysis rows
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(grazing_treatment), !is.na(period_id))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
  mutate(
    grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
    period_id = factor(period_id)
  ) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# Method 1: AM (visit-level arithmetic mean)
# -----------------------------
am_by_trt <- dat %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_rows = n(),
    n_cows = n_distinct(cow_id),
    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_rows),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
    .groups = "drop"
  ) %>%
  mutate(method = "AM") %>%
  select(method, grazing_treatment, n_cows, n_rows, CH4_mean, CH4_se, CH4_lcl_95, CH4_ucl_95)

# -----------------------------
# Method 2: TOD (bin means then average)
# -----------------------------
tod_by_trt <- dat %>%
  filter(!is.na(time_bin_4hr)) %>%
  group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
  summarise(ch4_bin_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop") %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(ch4_tod_period = mean(ch4_bin_mean, na.rm = TRUE), .groups = "drop") %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_rows = n(),                  # cow-period rows
    n_cows = n_distinct(cow_id),
    CH4_mean = mean(ch4_tod_period, na.rm = TRUE),
    CH4_sd = sd(ch4_tod_period, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_rows),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
    .groups = "drop"
  ) %>%
  mutate(method = "TOD") %>%
  select(method, grazing_treatment, n_cows, n_rows, CH4_mean, CH4_se, CH4_lcl_95, CH4_ucl_95)

# -----------------------------
# Method 3: LSMeans (AR1 model)
# -----------------------------
m_ar1 <- lme(
  CH4_g_d ~ grazing_treatment * period_id,
  random = ~ 1 | cow_id,
  correlation = corAR1(form = ~ obs_index | cow_id),
  data = dat,
  method = "REML",
  na.action = na.omit
)

counts_by_trt <- dat %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_cows = n_distinct(cow_id),
    n_rows = n(),
    .groups = "drop"
  )

lsm_by_trt <- as.data.frame(emmeans(m_ar1, ~ grazing_treatment)) %>%
  mutate(grazing_treatment = as.character(grazing_treatment)) %>%
  left_join(
    counts_by_trt %>% mutate(grazing_treatment = as.character(grazing_treatment)),
    by = "grazing_treatment"
  ) %>%
  transmute(
    method = "LSMeans_AR1",
    grazing_treatment,
    n_cows,
    n_rows,
    CH4_mean = emmean,
    CH4_se = SE,
    CH4_lcl_95 = lower.CL,
    CH4_ucl_95 = upper.CL
  )

# -----------------------------
# Combine + comparison
# -----------------------------
method_table <- bind_rows(am_by_trt, tod_by_trt, lsm_by_trt) %>%
  arrange(grazing_treatment, method)

overall_table <- method_table %>%
  group_by(method) %>%
  summarise(
    CH4_mean_overall = mean(CH4_mean, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    diff_vs_AM = CH4_mean_overall - CH4_mean_overall[method == "AM"],
    pct_vs_AM = 100 * diff_vs_AM / CH4_mean_overall[method == "AM"]
  )

wide_compare <- method_table %>%
  select(method, grazing_treatment, CH4_mean) %>%
  pivot_wider(names_from = method, values_from = CH4_mean) %>%
  mutate(
    AM_minus_TOD = AM - TOD,
    AM_minus_LSM = AM - LSMeans_AR1,
    TOD_minus_LSM = TOD - LSMeans_AR1
  )

print(method_table)

## # A tibble: 6 × 8
##   method   grazing_treatment n_cows n_rows CH4_mean CH4_se CH4_lcl_95 CH4_ucl_95
##   <chr>    <chr>              <int>  <int>    <dbl>  <dbl>      <dbl>      <dbl>
## 1 AM       K1                    19   1427     311.   2.31       306.       315.
## 2 LSMeans… K1                    19   1427     306.   8.56       289.       324.
## 3 TOD      K1                    19     62     311.   5.79       299.       322.
## 4 AM       K2                    16   1332     341.   2.43       337.       346.
## 5 LSMeans… K2                    16   1332     338.   8.90       320.       356.
## 6 TOD      K2                    16     62     340.   5.55       330.       351.

print(overall_table)

## # A tibble: 3 × 4
##   method      CH4_mean_overall diff_vs_AM pct_vs_AM
##   <chr>                  <dbl>      <dbl>     <dbl>
## 1 AM                      326.      0         0    
## 2 LSMeans_AR1             322.     -3.96     -1.21 
## 3 TOD                     326.     -0.531    -0.163

print(wide_compare)

## # A tibble: 2 × 7
##   grazing_treatment    AM LSMeans_AR1   TOD AM_minus_TOD AM_minus_LSM
##   <chr>             <dbl>       <dbl> <dbl>        <dbl>        <dbl>
## 1 K1                 311.        306.  311.       0.0558         4.27
## 2 K2                 341.        338.  340.       1.01           3.64
## # ℹ 1 more variable: TOD_minus_LSM <dbl>

# -----------------------------
# Save
# -----------------------------
write_csv(method_table, "CH4_method_comparison_by_treatment_15v4.csv")
write_csv(overall_table, "CH4_method_comparison_overall_15v4.csv")
write_csv(wide_compare, "CH4_method_pairwise_differences_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "By_Treatment")
addWorksheet(wb, "Overall")
addWorksheet(wb, "Pairwise_Diff")
writeData(wb, "By_Treatment", method_table)
writeData(wb, "Overall", overall_table)
writeData(wb, "Pairwise_Diff", wide_compare)
saveWorkbook(wb, "CH4_three_method_comparison_15v4.xlsx", overwrite = TRUE)

# Computes Pearson, Spearman, RMSE, Bland-Altman, and rank-shift:
library(readr)
library(dplyr)

# -----------------------------
# Load exact 15v4-qualified rows
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(is.finite(CH4_g_d))

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))

# -----------------------------
# AM cow means
# -----------------------------
cow_am <- dat %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(CH4_AM = mean(CH4_g_d, na.rm = TRUE), .groups = "drop")

# -----------------------------
# TOD cow means
# -----------------------------
cow_tod <- dat %>%
  filter(!is.na(time_bin_4hr)) %>%
  group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
  summarise(ch4_bin_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop") %>%
  group_by(cow_id, grazing_treatment, period_id) %>%
  summarise(ch4_tod_period = mean(ch4_bin_mean, na.rm = TRUE), .groups = "drop") %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(CH4_TOD = mean(ch4_tod_period, na.rm = TRUE), .groups = "drop")

# -----------------------------
# LSMeans-like cow adjusted means
# -----------------------------
cow_lsm <- read_csv("CH4_adjusted_means_by_cow_15v4.csv", show_col_types = FALSE) %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    CH4_LSM = as.numeric(CH4_adjusted_mean_conditional)
  )

# -----------------------------
# Merge
# -----------------------------
cow_all <- cow_am %>%
  inner_join(cow_tod, by = c("cow_id", "grazing_treatment")) %>%
  inner_join(cow_lsm, by = c("cow_id", "grazing_treatment"))

# -----------------------------
# Pairwise agreement stats
# -----------------------------
pair_stats <- function(df, x, y, label) {
  xv <- df[[x]]
  yv <- df[[y]]
  d <- xv - yv
  data.frame(
    comparison = label,
    n = length(xv),
    pearson_r = cor(xv, yv, method = "pearson"),
    pearson_p = cor.test(xv, yv, method = "pearson")$p.value,
    spearman_rho = cor(xv, yv, method = "spearman"),
    spearman_p = cor.test(xv, yv, method = "spearman")$p.value,
    mean_diff = mean(d),
    sd_diff = sd(d),
    rmse = sqrt(mean(d^2)),
    loa_low = mean(d) - 1.96 * sd(d),
    loa_high = mean(d) + 1.96 * sd(d)
  )
}

stats_all <- bind_rows(
  pair_stats(cow_all, "CH4_AM",  "CH4_TOD", "AM vs TOD"),
  pair_stats(cow_all, "CH4_AM",  "CH4_LSM", "AM vs LSMeans"),
  pair_stats(cow_all, "CH4_TOD", "CH4_LSM", "TOD vs LSMeans")
)

# -----------------------------
# Rank shifts
# -----------------------------
rank_tbl <- cow_all %>%
  mutate(
    rank_AM = rank(-CH4_AM, ties.method = "average"),
    rank_TOD = rank(-CH4_TOD, ties.method = "average"),
    rank_LSM = rank(-CH4_LSM, ties.method = "average"),
    shift_AM_TOD = rank_AM - rank_TOD,
    shift_AM_LSM = rank_AM - rank_LSM,
    shift_TOD_LSM = rank_TOD - rank_LSM
  ) %>%
  arrange(rank_AM)

print(stats_all)

##       comparison  n pearson_r    pearson_p spearman_rho spearman_p   mean_diff
## 1      AM vs TOD 35 0.9837427 3.442751e-26    0.9675070          0 -0.08970823
## 2  AM vs LSMeans 35 0.9982950 2.657313e-42    0.9969188          0 -0.19600603
## 3 TOD vs LSMeans 35 0.9827106 9.433727e-26    0.9705882          0 -0.10629780
##    sd_diff     rmse    loa_low loa_high
## 1 7.122566 7.020651 -14.049938 13.87052
## 2 3.279855 3.238597  -6.624522  6.23251
## 3 7.886157 7.773408 -15.563165 15.35057

print(head(rank_tbl, 10))

## # A tibble: 10 × 11
##    cow_id grazing_treatment CH4_AM CH4_TOD CH4_LSM rank_AM rank_TOD rank_LSM
##    <chr>  <chr>              <dbl>   <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 398G   K2                  399.    405.    392.       1        1        1
##  2 252H   K2                  385.    393.    383.       2        2        2
##  3 270G   K2                  382.    383.    380.       3        3        3
##  4 368J   K2                  370.    370.    368.       4        5        4
##  5 296H   K2                  364.    358.    363.       5        7        5
##  6 266H   K1                  359.    366.    356.       6        6        6
##  7 290G   K1                  359.    372.    356.       7        4        7
##  8 266D   K2                  356.    357.    356.       8        8        8
##  9 332H   K2                  353.    347.    352.       9        9        9
## 10 364B   K1                  348.    343.    346.      10       10       10
## # ℹ 3 more variables: shift_AM_TOD <dbl>, shift_AM_LSM <dbl>,
## #   shift_TOD_LSM <dbl>

write_csv(cow_all, "CH4_cow_level_AM_TOD_LSM_15v4.csv")
write_csv(stats_all, "CH4_method_agreement_stats_15v4.csv")
write_csv(rank_tbl, "CH4_method_rank_shifts_15v4.csv")

#Post-diagnostic check for LSmeans model framework

library(readr)
library(dplyr)
library(nlme)

# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  )

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

fit_diag <- function(df, yvar, prefix) {
  fml <- as.formula(paste0(yvar, " ~ grazing_treatment * period_id"))

  # Main selected model (AR1 + random intercept cow)
  m <- lme(
    fixed = fml,
    random = ~1 | cow_id,
    correlation = corAR1(form = ~ obs_index | cow_id),
    data = df,
    method = "REML",
    na.action = na.omit
  )

  # For heterogeneity test, compare constant vs varIdent using ML
  m_const_ml <- update(m, method = "ML")
  m_het_ml <- update(m_const_ml, weights = varIdent(form = ~1 | grazing_treatment))
  lrt <- anova(m_const_ml, m_het_ml)

  # Residuals/fitted
  r <- residuals(m, type = "normalized")
  f <- fitted(m)

  # Shapiro (sample if needed)
  set.seed(42)
  r_sub <- if (length(r) > 5000) sample(r, 5000) else r
  sh <- shapiro.test(r_sub)

  # Outlier count using |std resid| > 3
  n_out3 <- sum(abs(r) > 3, na.rm = TRUE)

  # Save plots
  png(paste0(prefix, "_postdiag_residual_plots.png"), width = 1400, height = 1000, res = 130)
  par(mfrow = c(2,2))
  hist(r, breaks = "FD", col = "#4C78A8", border = "white",
       main = paste(prefix, "Residual Histogram"), xlab = "Normalized residual")
  qqnorm(r, pch = 16, cex = 0.5, main = paste(prefix, "Residual QQ Plot"))
  qqline(r, col = "red", lwd = 2)
  plot(f, r, pch = 16, cex = 0.5, col = "#2E7D32",
       main = paste(prefix, "Residuals vs Fitted"), xlab = "Fitted", ylab = "Normalized residual")
  abline(h = 0, col = "red", lwd = 2)
  plot(f, sqrt(abs(r)), pch = 16, cex = 0.5, col = "#1565C0",
       main = paste(prefix, "Scale-Location"), xlab = "Fitted", ylab = "sqrt(|normalized residual|)")
  dev.off()

  # Residual autocorrelation summary (lag 1)
  ac <- ACF(m, resType = "normalized")
  ac_df <- as.data.frame(ac)
  lag1 <- ac_df$ACF[ac_df$lag == 1][1]

  # Output table
  out <- data.frame(
    response = yvar,
    n_obs = length(r),
    shapiro_W = unname(sh$statistic),
    shapiro_p = sh$p.value,
    normality_flag = ifelse(sh$p.value > 0.05, "Normal", "Non-normal"),
    n_abs_std_resid_gt3 = n_out3,
    ar1_phi = coef(m$modelStruct$corStruct, unconstrained = FALSE),
    resid_acf_lag1 = lag1,
    AIC_const = AIC(m_const_ml),
    AIC_het = AIC(m_het_ml),
    LRT = as.numeric(lrt$L.Ratio[2]),
    p_hetero = as.numeric(lrt$`p-value`[2]),
    hetero_flag = ifelse(as.numeric(lrt$`p-value`[2]) < 0.05, "Heterogeneous variance", "No strong heterogeneity")
  )

  list(model = m, summary = out)
}

ch4 <- fit_diag(dat, "CH4_g_d", "CH4_AR1_15v4")
co2 <- fit_diag(dat, "CO2_g_d", "CO2_AR1_15v4")

diag_tbl <- bind_rows(ch4$summary, co2$summary)
print(diag_tbl)

##         response n_obs shapiro_W    shapiro_p normality_flag
## Phi...1  CH4_g_d  2759 0.9974874 0.0001848912     Non-normal
## Phi...2  CO2_g_d  2759 0.9985740 0.0177185337     Non-normal
##         n_abs_std_resid_gt3   ar1_phi resid_acf_lag1 AIC_const  AIC_het
## Phi...1                  12 0.1057703    -0.02140034  32018.07 32017.72
## Phi...2                   9 0.1414392    -0.02781654  47217.70 47219.70
##                 LRT  p_hetero             hetero_flag
## Phi...1 2.356965558 0.1247246 No strong heterogeneity
## Phi...2 0.005462756 0.9410816 No strong heterogeneity

write_csv(diag_tbl, "LSMeans_AR1_postdiagnostic_summary_15v4.csv")

#Plot residual plot:
library(readr)
library(dplyr)
library(nlme)

# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  )

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# Fit AR1 models
# -----------------------------
m_ch4 <- lme(
  CH4_g_d ~ grazing_treatment * period_id,
  random = ~1 | cow_id,
  correlation = corAR1(form = ~obs_index | cow_id),
  data = dat,
  method = "REML",
  na.action = na.omit
)

m_co2 <- lme(
  CO2_g_d ~ grazing_treatment * period_id,
  random = ~1 | cow_id,
  correlation = corAR1(form = ~obs_index | cow_id),
  data = dat,
  method = "REML",
  na.action = na.omit
)

# -----------------------------
# Residual plot helper
# -----------------------------
make_resid_plots <- function(model, prefix, color_pt = "#2E7D32") {
  r <- residuals(model, type = "normalized")
  f <- fitted(model)

  png(paste0(prefix, "_residual_diagnostics.png"), width = 1400, height = 1000, res = 140)
  par(mfrow = c(2,2))

  hist(r, breaks = "FD", col = "#4C78A8", border = "white",
       main = paste(prefix, "Residual Histogram"),
       xlab = "Normalized residual")

  qqnorm(r, pch = 16, cex = 0.55, main = paste(prefix, "QQ Plot"))
  qqline(r, col = "red", lwd = 2)

  plot(f, r, pch = 16, cex = 0.5, col = color_pt,
       main = paste(prefix, "Residuals vs Fitted"),
       xlab = "Fitted", ylab = "Normalized residual")
  abline(h = 0, col = "red", lwd = 2)

  plot(f, sqrt(abs(r)), pch = 16, cex = 0.5, col = "#1565C0",
       main = paste(prefix, "Scale-Location"),
       xlab = "Fitted", ylab = "sqrt(|normalized residual|)")

  dev.off()
}

make_resid_plots(m_ch4, "CH4_AR1_15v4", color_pt = "forestgreen")

## quartz_off_screen 
##                 2

make_resid_plots(m_co2, "CO2_AR1_15v4", color_pt = "steelblue")

## quartz_off_screen 
##                 2

cat("Saved:\n",
    "- CH4_AR1_15v4_residual_diagnostics.png\n",
    "- CO2_AR1_15v4_residual_diagnostics.png\n")

## Saved:
##  - CH4_AR1_15v4_residual_diagnostics.png
##  - CO2_AR1_15v4_residual_diagnostics.png

#Use BoxCox to validate transformation
library(readr)
library(dplyr)
library(nlme)

# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  )

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# Box-Cox profiling for AR1 mixed model
# -----------------------------
boxcox_transform <- function(y, lambda) {
  if (abs(lambda) < 1e-8) log(y) else (y^lambda - 1) / lambda
}

profile_boxcox_ar1 <- function(df, response, lambdas = seq(-1, 2, by = 0.02), prefix = "CH4") {
  y <- df[[response]]
  if (any(!is.finite(y)) || any(y <= 0)) {
    stop(response, " has non-positive or non-finite values; Box-Cox requires y > 0.")
  }

  sum_log_y <- sum(log(y))
  ll <- rep(NA_real_, length(lambdas))

  for (i in seq_along(lambdas)) {
    lam <- lambdas[i]
    yt <- boxcox_transform(y, lam)

    d <- df
    d$y_bc <- yt

    fit <- try(
      lme(
        fixed = y_bc ~ grazing_treatment * period_id,
        random = ~1 | cow_id,
        correlation = corAR1(form = ~obs_index | cow_id),
        data = d,
        method = "ML",
        na.action = na.omit,
        control = lmeControl(msMaxIter = 100, msMaxEval = 150)
      ),
      silent = TRUE
    )

    if (!inherits(fit, "try-error")) {
      # Box-Cox profile likelihood correction term
      ll[i] <- as.numeric(logLik(fit)) + (lam - 1) * sum_log_y
    }
  }

  ok <- is.finite(ll)
  lam_ok <- lambdas[ok]
  ll_ok <- ll[ok]

  lam_hat <- lam_ok[which.max(ll_ok)]
  ll_max <- max(ll_ok)

  # 95% CI by profile likelihood drop: 0.5 * qchisq(0.95, 1)
  cutoff <- ll_max - 0.5 * qchisq(0.95, df = 1)
  ci_vals <- lam_ok[ll_ok >= cutoff]
  ci_low <- min(ci_vals)
  ci_high <- max(ci_vals)

  res <- tibble(
    response = response,
    n = length(y),
    lambda_hat = lam_hat,
    lambda_ci_low = ci_low,
    lambda_ci_high = ci_high,
    lambda_ci_includes_1 = (ci_low <= 1 && ci_high >= 1),
    transform_required = ifelse(ci_low <= 1 && ci_high >= 1, "No", "Yes"),
    suggested_transform = case_when(
      abs(lam_hat) < 0.15 ~ "Log transform",
      TRUE ~ paste0("Box-Cox with lambda=", round(lam_hat, 2))
    )
  )

  # Save profile for plotting
  prof <- tibble(lambda = lam_ok, profile_logLik = ll_ok)
  write_csv(prof, paste0(prefix, "_boxcox_profile_AR1_15v4.csv"))

  # Plot profile
  png(paste0(prefix, "_boxcox_profile_AR1_15v4.png"), width = 1000, height = 700, res = 130)
  plot(lam_ok, ll_ok, type = "l", lwd = 2,
       xlab = expression(lambda), ylab = "Profile log-likelihood",
       main = paste0(prefix, " Box-Cox Profile (AR1 mixed model, 15v4)"))
  abline(v = lam_hat, col = "red", lwd = 2, lty = 2)
  abline(v = c(ci_low, ci_high), col = "blue", lwd = 2, lty = 3)
  abline(v = 1, col = "darkgreen", lwd = 2, lty = 3)
  legend("bottomright",
         legend = c(
           paste0("lambda_hat=", round(lam_hat, 2)),
           paste0("95% CI=[", round(ci_low, 2), ", ", round(ci_high, 2), "]"),
           "lambda=1 (no transform)"
         ),
         lty = c(2, 3, 3), lwd = 2, col = c("red", "blue", "darkgreen"), bty = "n")
  dev.off()

  res
}

res_ch4 <- profile_boxcox_ar1(dat, "CH4_g_d", prefix = "CH4")
res_co2 <- profile_boxcox_ar1(dat, "CO2_g_d", prefix = "CO2")

boxcox_summary <- bind_rows(res_ch4, res_co2)
print(boxcox_summary)

## # A tibble: 2 × 8
##   response     n lambda_hat lambda_ci_low lambda_ci_high lambda_ci_includes_1
##   <chr>    <int>      <dbl>         <dbl>          <dbl> <lgl>               
## 1 CH4_g_d   2759       0.86          0.78           0.94 FALSE               
## 2 CO2_g_d   2759       0.62          0.44           0.8  FALSE               
## # ℹ 2 more variables: transform_required <chr>, suggested_transform <chr>

write_csv(boxcox_summary, "boxcox_transform_decision_AR1_15v4.csv")

# Check if trasnformation improves model:
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)

# -----------------------------
# Data (same 15v4 analysis set)
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  )

qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# Box-Cox transform function
# -----------------------------
bc <- function(y, lambda) {
  if (abs(lambda) < 1e-8) log(y) else (y^lambda - 1) / lambda
}

# Lambdas from your Box-Cox profiling
lam_ch4 <- 0.86
lam_co2 <- 0.62

dat <- dat %>%
  mutate(
    CH4_bc = bc(CH4_g_d, lam_ch4),
    CO2_bc = bc(CO2_g_d, lam_co2)
  )

# -----------------------------
# Fit original and transformed models
# -----------------------------
fit_ar1 <- function(response, data) {
  lme(
    fixed = as.formula(paste0(response, " ~ grazing_treatment * period_id")),
    random = ~1 | cow_id,
    correlation = corAR1(form = ~ obs_index | cow_id),
    data = data,
    method = "REML",
    na.action = na.omit
  )
}

m_ch4_raw <- fit_ar1("CH4_g_d", dat)
m_ch4_bc  <- fit_ar1("CH4_bc", dat)
m_co2_raw <- fit_ar1("CO2_g_d", dat)
m_co2_bc  <- fit_ar1("CO2_bc", dat)

# -----------------------------
# Compare fixed effects conclusions
# -----------------------------
anova_tbl <- bind_rows(
  as.data.frame(anova(m_ch4_raw)) %>% mutate(model = "CH4_raw"),
  as.data.frame(anova(m_ch4_bc))  %>% mutate(model = "CH4_boxcox"),
  as.data.frame(anova(m_co2_raw)) %>% mutate(model = "CO2_raw"),
  as.data.frame(anova(m_co2_bc))  %>% mutate(model = "CO2_boxcox"),
  .id = "term"
)

# -----------------------------
# Compare K2-K1 by period (same contrasts)
# -----------------------------
get_ctr <- function(mod, label) {
  as.data.frame(
    contrast(emmeans(mod, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
  ) %>% mutate(model = label)
}

ctr_tbl <- bind_rows(
  get_ctr(m_ch4_raw, "CH4_raw"),
  get_ctr(m_ch4_bc,  "CH4_boxcox"),
  get_ctr(m_co2_raw, "CO2_raw"),
  get_ctr(m_co2_bc,  "CO2_boxcox")
)

# -----------------------------
# Residual diagnostics summary
# -----------------------------
diag_one <- function(mod, label) {
  r <- residuals(mod, type = "normalized")
  set.seed(42)
  r_sub <- if (length(r) > 5000) sample(r, 5000) else r
  sh <- shapiro.test(r_sub)
  data.frame(
    model = label,
    n = length(r),
    shapiro_W = unname(sh$statistic),
    shapiro_p = sh$p.value,
    normality_flag = ifelse(sh$p.value > 0.05, "Normal", "Non-normal"),
    n_abs_std_resid_gt3 = sum(abs(r) > 3, na.rm = TRUE),
    AIC = AIC(mod),
    BIC = BIC(mod)
  )
}

diag_tbl <- bind_rows(
  diag_one(m_ch4_raw, "CH4_raw"),
  diag_one(m_ch4_bc,  "CH4_boxcox"),
  diag_one(m_co2_raw, "CO2_raw"),
  diag_one(m_co2_bc,  "CO2_boxcox")
)

# -----------------------------
# Quick robustness flags
# -----------------------------
sig_terms <- function(mod) {
  a <- as.data.frame(anova(mod))
  tibble(
    treatment_sig = a["grazing_treatment","p-value"] < 0.05,
    period_sig = a["period_id","p-value"] < 0.05,
    interaction_sig = a["grazing_treatment:period_id","p-value"] < 0.05
  )
}

robust_tbl <- bind_rows(
  bind_cols(response = "CH4", scale = "raw",    sig_terms(m_ch4_raw)),
  bind_cols(response = "CH4", scale = "boxcox", sig_terms(m_ch4_bc)),
  bind_cols(response = "CO2", scale = "raw",    sig_terms(m_co2_raw)),
  bind_cols(response = "CO2", scale = "boxcox", sig_terms(m_co2_bc))
)

print(diag_tbl)

##        model    n shapiro_W    shapiro_p normality_flag n_abs_std_resid_gt3
## 1    CH4_raw 2759 0.9974874 0.0001848912     Non-normal                  12
## 2 CH4_boxcox 2759 0.9978276 0.0007202216     Non-normal                  18
## 3    CO2_raw 2759 0.9985740 0.0177185337     Non-normal                   9
## 4 CO2_boxcox 2759 0.9993079 0.4038794757         Normal                  10
##        AIC      BIC
## 1 31959.56 32036.51
## 2 27529.80 27606.75
## 3 47101.83 47178.78
## 4 27740.92 27817.86

print(robust_tbl)

## # A tibble: 4 × 5
##   response scale  treatment_sig period_sig interaction_sig
##   <chr>    <chr>  <lgl>         <lgl>      <lgl>          
## 1 CH4      raw    TRUE          TRUE       TRUE           
## 2 CH4      boxcox TRUE          TRUE       TRUE           
## 3 CO2      raw    FALSE         TRUE       TRUE           
## 4 CO2      boxcox FALSE         TRUE       TRUE

# -----------------------------
# Save all outputs
# -----------------------------
write_csv(anova_tbl,  "sensitivity_anova_raw_vs_boxcox.csv")
write_csv(ctr_tbl,    "sensitivity_contrasts_raw_vs_boxcox.csv")
write_csv(diag_tbl,   "sensitivity_diagnostics_raw_vs_boxcox.csv")
write_csv(robust_tbl, "sensitivity_robustness_flags_raw_vs_boxcox.csv")

wb <- createWorkbook()
addWorksheet(wb, "ANOVA")
addWorksheet(wb, "Contrasts")
addWorksheet(wb, "Diagnostics")
addWorksheet(wb, "Robustness_Flags")
writeData(wb, "ANOVA", anova_tbl)
writeData(wb, "Contrasts", ctr_tbl)
writeData(wb, "Diagnostics", diag_tbl)
writeData(wb, "Robustness_Flags", robust_tbl)
saveWorkbook(wb, "sensitivity_raw_vs_boxcox_AR1_15v4.xlsx", overwrite = TRUE)

# Add weight and age as covariates to test grazing effects:
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)

# -----------------------------
# Paths
# -----------------------------
base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")

# -----------------------------
# 1) Load 15v4 analysis data
# -----------------------------
vis <- read_csv(file.path(work_dir, "analysis_3SD_with_14day_periods_4hr_bins.csv"), show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d)
  )

qual <- read_csv(file.path(work_dir, "cow_period_retained_15v_4bins.csv"), show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# 2) Load cow-level age and weight
# -----------------------------
age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    age_days = as.numeric(cow_age_in_days),
    age_years = as.numeric(cow_age_july_2025)
  )

wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    weight_day0_kg = as.numeric(weight_day0_kg),
    weight_mid_kg = as.numeric(weight_mid_kg),
    weight_end_kg = as.numeric(weight_end_kg),
    weight_mean_kg = rowMeans(cbind(weight_day0_kg, weight_mid_kg, weight_end_kg), na.rm = TRUE)
  )

cow_cov <- age_tbl %>%
  full_join(wt_tbl, by = c("cow_id","grazing_treatment"))

dat2 <- dat %>%
  left_join(cow_cov, by = c("cow_id","grazing_treatment"))

# QA
qa <- tibble(
  n_rows = nrow(dat2),
  n_cows = n_distinct(dat2$cow_id),
  missing_age_rows = sum(is.na(dat2$age_days)),
  missing_weight_rows = sum(is.na(dat2$weight_mean_kg))
)
print(qa)

## # A tibble: 1 × 4
##   n_rows n_cows missing_age_rows missing_weight_rows
##    <int>  <int>            <int>               <int>
## 1   2759     35                0                   0

# -----------------------------
# 3) Fit baseline and adjusted AR1 CH4 models
# -----------------------------
m0 <- lme(
  CH4_g_d ~ grazing_treatment * period_id,
  random = ~1 | cow_id,
  correlation = corAR1(form = ~ obs_index | cow_id),
  data = dat2,
  method = "REML",
  na.action = na.omit
)

m1 <- lme(
  CH4_g_d ~ grazing_treatment * period_id + scale(age_days) + scale(weight_mean_kg),
  random = ~1 | cow_id,
  correlation = corAR1(form = ~ obs_index | cow_id),
  data = dat2,
  method = "REML",
  na.action = na.omit
)

# -----------------------------
# 4) Compare fixed effects and fit
# -----------------------------
aov_m0 <- as.data.frame(anova(m0)) %>% mutate(model = "baseline")
aov_m1 <- as.data.frame(anova(m1)) %>% mutate(model = "age_weight_adjusted")
aov_all <- bind_rows(aov_m0, aov_m1, .id = "term_id")

fit_tbl <- tibble(
  model = c("baseline", "age_weight_adjusted"),
  AIC = c(AIC(m0), AIC(m1)),
  BIC = c(BIC(m0), BIC(m1)),
  logLik = c(as.numeric(logLik(m0)), as.numeric(logLik(m1)))
)

print(fit_tbl)

## # A tibble: 2 × 4
##   model                  AIC    BIC  logLik
##   <chr>                <dbl>  <dbl>   <dbl>
## 1 baseline            31960. 32037. -15967.
## 2 age_weight_adjusted 31943. 32032. -15957.

print(aov_m0)

##                             numDF denDF     F-value      p-value    model
## (Intercept)                     1  2716 2901.746247 0.0000000000 baseline
## grazing_treatment               1    33    5.296654 0.0278184960 baseline
## period_id                       4  2716   37.050044 0.0000000000 baseline
## grazing_treatment:period_id     4  2716    5.323581 0.0002865162 baseline

print(aov_m1)

##                             numDF denDF     F-value      p-value
## (Intercept)                     1  2716 3677.395522 0.0000000000
## grazing_treatment               1    31    6.771631 0.0140778639
## period_id                       4  2716   37.115385 0.0000000000
## scale(age_days)                 1    31    1.079869 0.3067645572
## scale(weight_mean_kg)           1    31    9.503630 0.0042826629
## grazing_treatment:period_id     4  2716    5.330185 0.0002831043
##                                           model
## (Intercept)                 age_weight_adjusted
## grazing_treatment           age_weight_adjusted
## period_id                   age_weight_adjusted
## scale(age_days)             age_weight_adjusted
## scale(weight_mean_kg)       age_weight_adjusted
## grazing_treatment:period_id age_weight_adjusted

# -----------------------------
# 5) Period-wise K2-K1 contrasts
# -----------------------------
ctr_m0 <- as.data.frame(
  contrast(emmeans(m0, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
) %>% mutate(model = "baseline")

ctr_m1 <- as.data.frame(
  contrast(emmeans(m1, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
) %>% mutate(model = "age_weight_adjusted")

ctr_all <- bind_rows(ctr_m0, ctr_m1)

print(ctr_all)

##    contrast period_id  estimate       SE df   t.ratio     p.value
## 1   K2 - K1         1 49.096135 13.60864 33 3.6077180 0.001008843
## 2   K2 - K1         2 45.833447 18.53361 33 2.4729912 0.018721156
## 3   K2 - K1         3 18.795533 14.60170 33 1.2872151 0.206974854
## 4   K2 - K1         4  8.379711 13.18213 33 0.6356871 0.529364449
## 5   K2 - K1         5 34.849093 13.85241 33 2.5157424 0.016924394
## 6   K2 - K1         1 44.994990 12.60780 31 3.5688209 0.001190974
## 7   K2 - K1         2 40.900153 17.79443 31 2.2984803 0.028439434
## 8   K2 - K1         3 14.524860 13.67916 31 1.0618238 0.296518157
## 9   K2 - K1         4  4.401621 12.14113 31 0.3625380 0.719410076
## 10  K2 - K1         5 31.579527 12.88182 31 2.4514801 0.020062370
##                  model
## 1             baseline
## 2             baseline
## 3             baseline
## 4             baseline
## 5             baseline
## 6  age_weight_adjusted
## 7  age_weight_adjusted
## 8  age_weight_adjusted
## 9  age_weight_adjusted
## 10 age_weight_adjusted

# -----------------------------
# 6) Save outputs
# -----------------------------
write_csv(qa, file.path(work_dir, "age_weight_join_QA_15v4.csv"))
write_csv(fit_tbl, file.path(work_dir, "CH4_AR1_fit_comparison_baseline_vs_age_weight_15v4.csv"))
write_csv(aov_all, file.path(work_dir, "CH4_AR1_anova_baseline_vs_age_weight_15v4.csv"))
write_csv(ctr_all, file.path(work_dir, "CH4_AR1_contrasts_baseline_vs_age_weight_15v4.csv"))

wb <- createWorkbook()
addWorksheet(wb, "QA")
addWorksheet(wb, "Fit_Comparison")
addWorksheet(wb, "ANOVA")
addWorksheet(wb, "Contrasts")
writeData(wb, "QA", qa)
writeData(wb, "Fit_Comparison", fit_tbl)
writeData(wb, "ANOVA", aov_all)
writeData(wb, "Contrasts", ctr_all)
saveWorkbook(wb, file.path(work_dir, "CH4_AR1_age_weight_adjustment_15v4.xlsx"), overwrite = TRUE)

library(tibble)
library(openxlsx)

compact <- tribble(
  ~Metric, ~Baseline, ~AgeWeight_Adjusted,
  "AIC", "31959.56", "31943.21",
  "Treatment p-value", "0.0278", "0.0141",
  "Period p-value", "<0.0001", "<0.0001",
  "Treatment×Period p-value", "0.0003", "0.0003",
  "Age p-value", "—", "0.3068",
  "Weight p-value", "—", "0.0043",
  "Significant K2-K1 periods", "3/5 (1,2,5)", "3/5 (1,2,5)"
)

print(compact)

## # A tibble: 7 × 3
##   Metric                    Baseline    AgeWeight_Adjusted
##   <chr>                     <chr>       <chr>             
## 1 AIC                       31959.56    31943.21          
## 2 Treatment p-value         0.0278      0.0141            
## 3 Period p-value            <0.0001     <0.0001           
## 4 Treatment×Period p-value  0.0003      0.0003            
## 5 Age p-value               —           0.3068            
## 6 Weight p-value            —           0.0043            
## 7 Significant K2-K1 periods 3/5 (1,2,5) 3/5 (1,2,5)

wb <- createWorkbook()
addWorksheet(wb, "Compact_Result")
writeData(wb, "Compact_Result", compact)
saveWorkbook(wb, "CH4_age_weight_compact_summary_15v4.xlsx", overwrite = TRUE)

library(tibble)
library(openxlsx)

compact <- tribble(
  ~Metric, ~Baseline, ~AgeWeight_Adjusted, ~Interpretation,
  "AIC", "31959.56", "31943.21", "Lower AIC after adjustment indicates better fit.",
  "Treatment p-value", "0.0278", "0.0141", "Treatment effect remains significant after adjusting for age and weight.",
  "Period p-value", "<0.0001", "<0.0001", "Strong period effect in both models.",
  "Treatment×Period p-value", "0.0003", "0.0003", "Interaction is stable; treatment differences vary by period in both models.",
  "Age p-value", "—", "0.3068", "Age is not a significant predictor in the adjusted model.",
  "Weight p-value", "—", "0.0043", "Weight is a significant covariate.",
  "Significant K2-K1 periods", "3/5 (1,2,5)", "3/5 (1,2,5)", "Period-level treatment contrast pattern is unchanged (robust)."
)

print(compact)

## # A tibble: 7 × 4
##   Metric                    Baseline    AgeWeight_Adjusted Interpretation       
##   <chr>                     <chr>       <chr>              <chr>                
## 1 AIC                       31959.56    31943.21           Lower AIC after adju…
## 2 Treatment p-value         0.0278      0.0141             Treatment effect rem…
## 3 Period p-value            <0.0001     <0.0001            Strong period effect…
## 4 Treatment×Period p-value  0.0003      0.0003             Interaction is stabl…
## 5 Age p-value               —           0.3068             Age is not a signifi…
## 6 Weight p-value            —           0.0043             Weight is a signific…
## 7 Significant K2-K1 periods 3/5 (1,2,5) 3/5 (1,2,5)        Period-level treatme…

wb <- createWorkbook()
addWorksheet(wb, "Compact_Result")
writeData(wb, "Compact_Result", compact)
saveWorkbook(wb, "CH4_age_weight_compact_summary_with_interpretation_15v4.xlsx", overwrite = TRUE)

# Checking grazing treatment actual contribution to variation
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(openxlsx)

# -----------------------------
# Paths
# -----------------------------
base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")

# -----------------------------
# Load analysis data (15v4)
# -----------------------------
vis <- read_csv(file.path(work_dir, "analysis_3SD_with_14day_periods_4hr_bins.csv"), show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id)),
    CH4_g_d = as.numeric(CH4_g_d)
  )

qual <- read_csv(file.path(work_dir, "cow_period_retained_15v_4bins.csv"), show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    period_id = factor(as.integer(period_id))
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(is.finite(CH4_g_d)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

# -----------------------------
# Load age + weight covariates
# -----------------------------
age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    age_days = as.numeric(cow_age_in_days)
  )

wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
  transmute(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    weight_mean_kg = rowMeans(
      cbind(as.numeric(weight_day0_kg), as.numeric(weight_mid_kg), as.numeric(weight_end_kg)),
      na.rm = TRUE
    )
  )

dat2 <- dat %>%
  left_join(age_tbl, by = c("cow_id","grazing_treatment")) %>%
  left_join(wt_tbl,  by = c("cow_id","grazing_treatment"))

# -----------------------------
# Helper functions
# -----------------------------
fit_ar1_ml <- function(fml, data) {
  lme(
    fixed = fml,
    random = ~1 | cow_id,
    correlation = corAR1(form = ~obs_index | cow_id),
    data = data,
    method = "ML",
    na.action = na.omit
  )
}

pseudo_r2 <- function(mod) {
  y <- model.response(model.frame(formula(mod), data = getData(mod)))
  e <- resid(mod, type = "response")
  1 - var(e, na.rm = TRUE) / var(y, na.rm = TRUE)
}

# -----------------------------
# Fit models
# -----------------------------
m_full      <- fit_ar1_ml(CH4_g_d ~ grazing_treatment * period_id + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_trt    <- fit_ar1_ml(CH4_g_d ~ period_id + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_period <- fit_ar1_ml(CH4_g_d ~ grazing_treatment + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_agewt  <- fit_ar1_ml(CH4_g_d ~ grazing_treatment * period_id, dat2)

R_full      <- pseudo_r2(m_full)
R_no_trt    <- pseudo_r2(m_no_trt)
R_no_period <- pseudo_r2(m_no_period)
R_no_agewt  <- pseudo_r2(m_no_agewt)

# Absolute contribution in explained variance
contrib_treatment <- R_full - R_no_trt
contrib_period    <- R_full - R_no_period
contrib_ageweight <- R_full - R_no_agewt

# Percent of explained variance attributable to treatment
pct_treatment_of_explained <- 100 * contrib_treatment / R_full
pct_other_of_explained     <- 100 - pct_treatment_of_explained
do

## function (.data, ...) 
## {
##     lifecycle::signal_stage("superseded", "do()")
##     UseMethod("do")
## }
## <bytecode: 0x11a3303e0>
## <environment: namespace:dplyr>

result_tbl <- tibble(
  metric = c(
    "R2_full",
    "R2_no_treatment",
    "R2_no_period",
    "R2_no_age_weight",
    "treatment_abs_contrib",
    "period_abs_contrib",
    "age_weight_abs_contrib",
    "treatment_pct_of_explained",
    "other_pct_of_explained"
  ),
  value = c(
    R_full,
    R_no_trt,
    R_no_period,
    R_no_agewt,
    contrib_treatment,
    contrib_period,
    contrib_ageweight,
    pct_treatment_of_explained,
    pct_other_of_explained
  )
)

print(result_tbl)

## # A tibble: 9 × 2
##   metric                         value
##   <chr>                          <dbl>
## 1 R2_full                     0.230   
## 2 R2_no_treatment             0.223   
## 3 R2_no_period                0.170   
## 4 R2_no_age_weight            0.230   
## 5 treatment_abs_contrib       0.00703 
## 6 period_abs_contrib          0.0591  
## 7 age_weight_abs_contrib     -0.000480
## 8 treatment_pct_of_explained  3.06    
## 9 other_pct_of_explained     96.9

# Save
write_csv(result_tbl, file.path(work_dir, "CH4_contribution_treatment_vs_other_15v4.csv"))

wb <- createWorkbook()
addWorksheet(wb, "CH4_Contribution")
writeData(wb, "CH4_Contribution", result_tbl)
saveWorkbook(wb, file.path(work_dir, "CH4_contribution_treatment_vs_other_15v4.xlsx"), overwrite = TRUE)

# Check rotation frequency effect:
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)

base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")
excel_origin <- as.Date("1899-12-30")

dat_raw <- read_csv(file.path(work_dir, "analysis_3SD_enriched.csv"), show_col_types = FALSE)

dat <- dat_raw %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
    CH4_g_d = as.numeric(CH4_g_d),
    start_time = as.numeric(start_time),
    visit_date = if ("visit_date" %in% names(dat_raw)) as.Date(visit_date) else (excel_origin + floor(start_time))
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(visit_date), !is.na(grazing_treatment)) %>%
  mutate(
    rotation_phase = case_when(
      visit_date >= as.Date("2025-06-27") & visit_date <= as.Date("2025-07-30") ~ "Cycle1_2to4d",
      visit_date >= as.Date("2025-07-31") & visit_date <= as.Date("2025-09-09") ~ "Cycle2_gt4d",
      TRUE ~ NA_character_
    ),
    rotation_phase = factor(rotation_phase, levels = c("Cycle1_2to4d", "Cycle2_gt4d")),
    day_index = as.numeric(visit_date - min(visit_date, na.rm = TRUE))
  ) %>%
  filter(!is.na(rotation_phase)) %>%
  group_by(cow_id) %>%
  arrange(start_time, .by_group = TRUE) %>%
  mutate(obs_index = row_number()) %>%
  ungroup()

age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
  transmute(cow_id = as.character(cow_id),
            grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
            age_days = as.numeric(cow_age_in_days))

wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
  transmute(cow_id = as.character(cow_id),
            grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
            weight_mean_kg = rowMeans(cbind(as.numeric(weight_day0_kg), as.numeric(weight_mid_kg), as.numeric(weight_end_kg)), na.rm = TRUE))

dat2 <- dat %>%
  left_join(age_tbl, by = c("cow_id","grazing_treatment")) %>%
  left_join(wt_tbl, by = c("cow_id","grazing_treatment"))

m_rot <- lme(
  CH4_g_d ~ grazing_treatment * rotation_phase + scale(age_days) + scale(weight_mean_kg) + scale(day_index),
  random = ~1 | cow_id,
  correlation = corAR1(form = ~obs_index | cow_id),
  data = dat2,
  method = "REML",
  na.action = na.omit
)

emm <- emmeans(m_rot, ~ grazing_treatment * rotation_phase)
did_df <- as.data.frame(contrast(emm, method = list("DiD_rotation_effect" = c(-1, 1, 1, -1))))
print(did_df)

##  contrast            estimate       SE   df t.ratio p.value
##  DiD_rotation_effect  29.3701 6.379368 3320   4.604  <.0001
## 
## Degrees-of-freedom method: containment

library(openxlsx)

wb <- createWorkbook()
addWorksheet(wb, "DiD_CH4")
writeData(wb, "DiD_CH4", did_df)
saveWorkbook(wb, "rotation_effect_DiD_CH4_after3SD.xlsx", overwrite = TRUE)

# Check visit distribution based on TOD

library(readr)
library(dplyr)
library(openxlsx)
library(ggplot2)

# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    visit_date = as.Date(visit_date),
    time_bin_4hr = as.character(time_bin_4hr)
  )

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
  filter(!is.na(grazing_treatment))

# -----------------------------
# 1) Overall visit counts by treatment
# -----------------------------
visits_overall <- dat %>%
  group_by(grazing_treatment) %>%
  summarise(
    total_visits = n(),
    n_cows = n_distinct(cow_id),
    n_periods = n_distinct(period_id),
    .groups = "drop"
  )

# -----------------------------
# 2) Visits by period x treatment
# -----------------------------
visits_by_period <- dat %>%
  group_by(period_id, grazing_treatment) %>%
  summarise(
    total_visits = n(),
    n_cows = n_distinct(cow_id),
    visits_per_cow = total_visits / n_cows,
    .groups = "drop"
  ) %>%
  arrange(period_id, grazing_treatment)

# -----------------------------
# 3) Visits by 4h bin x treatment (TOD distribution)
# -----------------------------
visits_by_bin <- dat %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(total_visits = n(), .groups = "drop") %>%
  group_by(grazing_treatment) %>%
  mutate(
    pct_within_treatment = 100 * total_visits / sum(total_visits)
  ) %>%
  ungroup()

# -----------------------------
# 4) Daily visits by treatment
# -----------------------------
visits_by_day <- dat %>%
  group_by(visit_date, grazing_treatment) %>%
  summarise(n_visits = n(), .groups = "drop") %>%
  arrange(visit_date, grazing_treatment)

visits_by_day_summary <- visits_by_day %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_days = n_distinct(visit_date),
    mean_visits_per_day = mean(n_visits),
    sd_visits_per_day = sd(n_visits),
    min_visits_per_day = min(n_visits),
    max_visits_per_day = max(n_visits),
    .groups = "drop"
  )

# -----------------------------
# 5) Plots
# -----------------------------
p_bin <- ggplot(visits_by_bin,
                aes(x = time_bin_4hr, y = total_visits, fill = grazing_treatment)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(title = "Visit Distribution by 4-hour TOD Bin",
       x = "4-hour bin", y = "Number of visits", fill = "Treatment") +
  theme_minimal(base_size = 13) +
  theme(axis.text.x = element_text(angle = 25, hjust = 1))

ggsave("TOD_visit_distribution_by_bin_K1_K2_15v4.png", p_bin, width = 9, height = 5.5, dpi = 320)

p_period <- ggplot(visits_by_period,
                   aes(x = factor(period_id), y = total_visits, fill = grazing_treatment)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(title = "Visit Distribution by 14-day Period",
       x = "Period", y = "Number of visits", fill = "Treatment") +
  theme_minimal(base_size = 13)

ggsave("TOD_visit_distribution_by_period_K1_K2_15v4.png", p_period, width = 8.5, height = 5.5, dpi = 320)

# -----------------------------
# 6) Save tables
# -----------------------------
write_csv(visits_overall, "TOD_visits_overall_by_treatment_15v4.csv")
write_csv(visits_by_period, "TOD_visits_by_period_treatment_15v4.csv")
write_csv(visits_by_bin, "TOD_visits_by_4hr_bin_treatment_15v4.csv")
write_csv(visits_by_day, "TOD_visits_by_day_treatment_15v4.csv")
write_csv(visits_by_day_summary, "TOD_visits_by_day_summary_treatment_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Overall")
addWorksheet(wb, "By_Period")
addWorksheet(wb, "By_4hr_Bin")
addWorksheet(wb, "By_Day")
addWorksheet(wb, "By_Day_Summary")
writeData(wb, "Overall", visits_overall)
writeData(wb, "By_Period", visits_by_period)
writeData(wb, "By_4hr_Bin", visits_by_bin)
writeData(wb, "By_Day", visits_by_day)
writeData(wb, "By_Day_Summary", visits_by_day_summary)
saveWorkbook(wb, "TOD_visit_distribution_K1_K2_15v4.xlsx", overwrite = TRUE)

print(visits_overall)

## # A tibble: 2 × 4
##   grazing_treatment total_visits n_cows n_periods
##   <chr>                    <int>  <int>     <int>
## 1 K1                        1427     19         5
## 2 K2                        1332     16         5

print(visits_by_period)

## # A tibble: 10 × 5
##    period_id grazing_treatment total_visits n_cows visits_per_cow
##        <int> <chr>                    <int>  <int>          <dbl>
##  1         1 K1                         354     16           22.1
##  2         1 K2                         309     11           28.1
##  3         2 K1                          54      3           18  
##  4         2 K2                         184     10           18.4
##  5         3 K1                         162      9           18  
##  6         3 K2                         306     14           21.9
##  7         4 K1                         521     18           28.9
##  8         4 K2                         298     15           19.9
##  9         5 K1                         336     16           21  
## 10         5 K2                         235     12           19.6

print(visits_by_bin)

## # A tibble: 12 × 4
##    grazing_treatment time_bin_4hr total_visits pct_within_treatment
##    <chr>             <chr>               <int>                <dbl>
##  1 K1                00:00-04:00            11                0.771
##  2 K1                04:00-08:00           362               25.4  
##  3 K1                08:00-12:00           258               18.1  
##  4 K1                12:00-16:00           292               20.5  
##  5 K1                16:00-20:00           392               27.5  
##  6 K1                20:00-24:00           112                7.85 
##  7 K2                00:00-04:00            53                3.98 
##  8 K2                04:00-08:00           202               15.2  
##  9 K2                08:00-12:00           354               26.6  
## 10 K2                12:00-16:00           321               24.1  
## 11 K2                16:00-20:00           275               20.6  
## 12 K2                20:00-24:00           127                9.53

print(visits_by_day_summary)

## # A tibble: 2 × 6
##   grazing_treatment n_days mean_visits_per_day sd_visits_per_day
##   <chr>              <int>               <dbl>             <dbl>
## 1 K1                    62                23.0             13.5 
## 2 K2                    70                19.0              9.28
## # ℹ 2 more variables: min_visits_per_day <int>, max_visits_per_day <int>

# Compare visit distribution

library(readr)
library(dplyr)
library(ggplot2)

# Input from your saved output
bin_df <- read_csv("TOD_visits_by_4hr_bin_treatment_15v4.csv", show_col_types = FALSE) %>%
  mutate(
    grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
    time_bin_4hr = factor(
      time_bin_4hr,
      levels = c("00:00-04:00","04:00-08:00","08:00-12:00",
                 "12:00-16:00","16:00-20:00","20:00-24:00")
    )
  )

# 1) Compare visit COUNTS by bin (K1 vs K2)
p_count <- ggplot(bin_df, aes(x = time_bin_4hr, y = total_visits, fill = grazing_treatment)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(
    title = "TOD Visit Count Distribution: K1 vs K2 (15v4)",
    x = "4-hour time bin",
    y = "Visit count",
    fill = "Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 25, hjust = 1)
  )

# 2) Compare visit PERCENT by bin (within each treatment)
p_pct <- ggplot(bin_df, aes(x = time_bin_4hr, y = pct_within_treatment, color = grazing_treatment, group = grazing_treatment)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 2.8) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(
    title = "TOD Visit Percentage Distribution: K1 vs K2 (15v4)",
    x = "4-hour time bin",
    y = "Percent of visits within treatment (%)",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 25, hjust = 1)
  )

ggsave("TOD_compare_K1_K2_counts_15v4.png", p_count, width = 10, height = 6, dpi = 320)
ggsave("TOD_compare_K1_K2_percent_15v4.png", p_pct, width = 10, height = 6, dpi = 320)

print(p_count)

print(p_pct)

# Check visit distribution statistical significance
library(readr)
library(dplyr)

d <- read_csv("TOD_visits_by_4hr_bin_treatment_15v4.csv", show_col_types = FALSE)

tab <- xtabs(total_visits ~ grazing_treatment + time_bin_4hr, data = d)
print(tab)

##                  time_bin_4hr
## grazing_treatment 00:00-04:00 04:00-08:00 08:00-12:00 12:00-16:00 16:00-20:00
##                K1          11         362         258         292         392
##                K2          53         202         354         321         275
##                  time_bin_4hr
## grazing_treatment 20:00-24:00
##                K1         112
##                K2         127

chi <- chisq.test(tab)
print(chi)

## 
##  Pearson's Chi-squared test
## 
## data:  tab
## X-squared = 107.7, df = 5, p-value < 2.2e-16

cat("\nExpected counts:\n")

## 
## Expected counts:

print(chi$expected)

##                  time_bin_4hr
## grazing_treatment 00:00-04:00 04:00-08:00 08:00-12:00 12:00-16:00 16:00-20:00
##                K1    33.10185      291.71    316.5364    317.0536    344.9833
##                K2    30.89815      272.29    295.4636    295.9464    322.0167
##                  time_bin_4hr
## grazing_treatment 20:00-24:00
##                K1    123.6147
##                K2    115.3853

# Optional effect size
n <- sum(tab)
r <- nrow(tab)
c <- ncol(tab)
cramers_v <- sqrt(as.numeric(chi$statistic) / (n * min(r - 1, c - 1)))
cat("\nCramer's V:", cramers_v, "\n")

## 
## Cramer's V: 0.1975792

# If expected counts are small, run Fisher as sensitivity
if (any(chi$expected < 5)) {
  cat("\nSome expected counts < 5; running Fisher's exact test (may be slow)...\n")
  print(fisher.test(tab))
}

# Estimate CH4 and CO2 emissions based on TOD
library(readr)
library(dplyr)
library(ggplot2)
library(openxlsx)

# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    CO2_g_d = as.numeric(CO2_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  )

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
  filter(
    !is.na(time_bin_4hr),
    is.finite(CH4_g_d),
    is.finite(CO2_g_d)
  ) %>%
  mutate(
    grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
    time_bin_4hr = factor(
      time_bin_4hr,
      levels = c("00:00-04:00","04:00-08:00","08:00-12:00",
                 "12:00-16:00","16:00-20:00","20:00-24:00")
    )
  )

# -----------------------------
# CH4 by bin x treatment
# -----------------------------
ch4_bin <- dat %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(
    n_visits = n(),
    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_visits),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
    .groups = "drop"
  )

# -----------------------------
# CO2 by bin x treatment
# -----------------------------
co2_bin <- dat %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(
    n_visits = n(),
    CO2_mean = mean(CO2_g_d, na.rm = TRUE),
    CO2_sd = sd(CO2_g_d, na.rm = TRUE),
    CO2_se = CO2_sd / sqrt(n_visits),
    CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
    CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,
    .groups = "drop"
  )

# -----------------------------
# Plot CH4
# -----------------------------
p_ch4 <- ggplot(ch4_bin, aes(x = time_bin_4hr, y = CH4_mean, color = grazing_treatment, group = grazing_treatment)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = CH4_lcl_95, ymax = CH4_ucl_95), width = 0.15, linewidth = 0.7) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(
    title = "CH4 Emission by 4-hour Time Bin (15v4)",
    x = "4-hour time bin",
    y = "Mean CH4 (g/day)",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold"),
        axis.text.x = element_text(angle = 25, hjust = 1))

# -----------------------------
# Plot CO2
# -----------------------------
p_co2 <- ggplot(co2_bin, aes(x = time_bin_4hr, y = CO2_mean, color = grazing_treatment, group = grazing_treatment)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = CO2_lcl_95, ymax = CO2_ucl_95), width = 0.15, linewidth = 0.7) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  labs(
    title = "CO2 Emission by 4-hour Time Bin (15v4)",
    x = "4-hour time bin",
    y = "Mean CO2 (g/day)",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold"),
        axis.text.x = element_text(angle = 25, hjust = 1))

ggsave("CH4_by_timebin_K1_K2_15v4.png", p_ch4, width = 10, height = 6, dpi = 320)
ggsave("CO2_by_timebin_K1_K2_15v4.png", p_co2, width = 10, height = 6, dpi = 320)

print(p_ch4)

print(p_co2)

# -----------------------------
# Save tables
# -----------------------------
write_csv(ch4_bin, "CH4_by_timebin_treatment_15v4.csv")
write_csv(co2_bin, "CO2_by_timebin_treatment_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "CH4_by_bin")
addWorksheet(wb, "CO2_by_bin")
writeData(wb, "CH4_by_bin", ch4_bin)
writeData(wb, "CO2_by_bin", co2_bin)
saveWorkbook(wb, "Emissions_by_timebin_K1_K2_15v4.xlsx", overwrite = TRUE)

# Check the reliabinlity of each time bin:

library(readr)
library(dplyr)
library(openxlsx)

# Inputs
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# Load TOD-qualified rows
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id","grazing_treatment","period_id"))

# Total cows per treatment in analysis
cow_totals <- dat %>%
  group_by(grazing_treatment) %>%
  summarise(total_cows = n_distinct(cow_id), .groups = "drop")

# Reliability by treatment x time bin
reliability <- dat %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(
    n_visits = n(),
    n_cows_contributing = n_distinct(cow_id),
    mean_CH4 = mean(CH4_g_d, na.rm = TRUE),
    sd_CH4 = sd(CH4_g_d, na.rm = TRUE),
    se_CH4 = sd_CH4 / sqrt(n_visits),
    cv_percent = 100 * sd_CH4 / mean_CH4,
    .groups = "drop"
  ) %>%
  left_join(cow_totals, by = "grazing_treatment") %>%
  mutate(
    cow_coverage_pct = 100 * n_cows_contributing / total_cows,
    reliability_flag = case_when(
      n_cows_contributing < 8 ~ "Low reliability",
      cow_coverage_pct < 50 ~ "Low reliability",
      n_visits < 30 ~ "Caution",
      TRUE ~ "Adequate"
    )
  ) %>%
  arrange(grazing_treatment, time_bin_4hr)

print(reliability)

## # A tibble: 12 × 11
##    grazing_treatment time_bin_4hr n_visits n_cows_contributing mean_CH4 sd_CH4
##    <chr>             <chr>           <int>               <int>    <dbl>  <dbl>
##  1 K1                00:00-04:00        11                   9     329.   79.3
##  2 K1                04:00-08:00       362                  19     317.   81.5
##  3 K1                08:00-12:00       258                  19     310.   86.6
##  4 K1                12:00-16:00       292                  19     293.   96.8
##  5 K1                16:00-20:00       392                  19     312.   85.8
##  6 K1                20:00-24:00       112                  19     330.   82.8
##  7 K2                00:00-04:00        53                  12     408.   82.8
##  8 K2                04:00-08:00       202                  16     325.   84.9
##  9 K2                08:00-12:00       354                  16     330.   88.4
## 10 K2                12:00-16:00       321                  16     341.   88.5
## 11 K2                16:00-20:00       275                  16     348.   85.9
## 12 K2                20:00-24:00       127                  15     360.   88.6
## # ℹ 5 more variables: se_CH4 <dbl>, cv_percent <dbl>, total_cows <int>,
## #   cow_coverage_pct <dbl>, reliability_flag <chr>

# Optional: midnight-specific check
midnight_check <- reliability %>% filter(time_bin_4hr == "00:00-04:00")
cat("\n=== Midnight bin reliability ===\n")

## 
## === Midnight bin reliability ===

print(midnight_check)

## # A tibble: 2 × 11
##   grazing_treatment time_bin_4hr n_visits n_cows_contributing mean_CH4 sd_CH4
##   <chr>             <chr>           <int>               <int>    <dbl>  <dbl>
## 1 K1                00:00-04:00        11                   9     329.   79.3
## 2 K2                00:00-04:00        53                  12     408.   82.8
## # ℹ 5 more variables: se_CH4 <dbl>, cv_percent <dbl>, total_cows <int>,
## #   cow_coverage_pct <dbl>, reliability_flag <chr>

write_csv(reliability, "CH4_timebin_reliability_15v4.csv")
write_csv(midnight_check, "CH4_midnight_reliability_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Reliability_All_Bins")
addWorksheet(wb, "Midnight_Only")
writeData(wb, "Reliability_All_Bins", reliability)
writeData(wb, "Midnight_Only", midnight_check)
saveWorkbook(wb, "CH4_timebin_reliability_15v4.xlsx", overwrite = TRUE)

#Filter for minimum of 2 visits per cow
library(readr)
library(dplyr)
library(openxlsx)

# Inputs
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# Load TOD-qualified rows
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    time_bin_4hr = as.character(time_bin_4hr)
  ) %>%
  filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))

# -----------------------------
# Require >=2 visits per cow within each time bin
# -----------------------------
cow_bin_counts <- dat %>%
  group_by(cow_id, grazing_treatment, time_bin_4hr) %>%
  summarise(n_visits_cow_bin = n(), .groups = "drop")

eligible_cow_bin <- cow_bin_counts %>%
  filter(n_visits_cow_bin >= 2)

dat_reliable <- dat %>%
  inner_join(
    eligible_cow_bin %>% select(cow_id, grazing_treatment, time_bin_4hr),
    by = c("cow_id", "grazing_treatment", "time_bin_4hr")
  )

# Coverage summary
coverage <- cow_bin_counts %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(
    n_cows_all = n_distinct(cow_id),
    n_cows_ge2 = n_distinct(cow_id[n_visits_cow_bin >= 2]),
    pct_cows_ge2 = 100 * n_cows_ge2 / n_cows_all,
    .groups = "drop"
  ) %>%
  arrange(grazing_treatment, time_bin_4hr)

# CH4 mean by bin after reliability filter
ch4_bin_reliable <- dat_reliable %>%
  group_by(grazing_treatment, time_bin_4hr) %>%
  summarise(
    n_visits = n(),
    n_cows_contributing = n_distinct(cow_id),
    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_visits),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
    .groups = "drop"
  ) %>%
  arrange(grazing_treatment, time_bin_4hr)

# Print key outputs
cat("Rows before filter:", nrow(dat), "\n")

## Rows before filter: 2759

cat("Rows after >=2 visits/cow/bin filter:", nrow(dat_reliable), "\n\n")

## Rows after >=2 visits/cow/bin filter: 2747

cat("=== Cow coverage by bin (>=2 visits rule) ===\n")

## === Cow coverage by bin (>=2 visits rule) ===

print(coverage)

## # A tibble: 12 × 5
##    grazing_treatment time_bin_4hr n_cows_all n_cows_ge2 pct_cows_ge2
##    <chr>             <chr>             <int>      <int>        <dbl>
##  1 K1                00:00-04:00           9          2         22.2
##  2 K1                04:00-08:00          19         19        100  
##  3 K1                08:00-12:00          19         19        100  
##  4 K1                12:00-16:00          19         19        100  
##  5 K1                16:00-20:00          19         19        100  
##  6 K1                20:00-24:00          19         17         89.5
##  7 K2                00:00-04:00          12          9         75  
##  8 K2                04:00-08:00          16         16        100  
##  9 K2                08:00-12:00          16         16        100  
## 10 K2                12:00-16:00          16         16        100  
## 11 K2                16:00-20:00          16         16        100  
## 12 K2                20:00-24:00          15         15        100

cat("\n=== CH4 by bin after >=2 visits rule ===\n")

## 
## === CH4 by bin after >=2 visits rule ===

print(ch4_bin_reliable)

## # A tibble: 12 × 9
##    grazing_treatment time_bin_4hr n_visits n_cows_contributing CH4_mean CH4_sd
##    <chr>             <chr>           <int>               <int>    <dbl>  <dbl>
##  1 K1                00:00-04:00         4                   2     357.   97.5
##  2 K1                04:00-08:00       362                  19     317.   81.5
##  3 K1                08:00-12:00       258                  19     310.   86.6
##  4 K1                12:00-16:00       292                  19     293.   96.8
##  5 K1                16:00-20:00       392                  19     312.   85.8
##  6 K1                20:00-24:00       110                  17     331.   83.2
##  7 K2                00:00-04:00        50                   9     408.   84.0
##  8 K2                04:00-08:00       202                  16     325.   84.9
##  9 K2                08:00-12:00       354                  16     330.   88.4
## 10 K2                12:00-16:00       321                  16     341.   88.5
## 11 K2                16:00-20:00       275                  16     348.   85.9
## 12 K2                20:00-24:00       127                  15     360.   88.6
## # ℹ 3 more variables: CH4_se <dbl>, CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>

# Save
write_csv(coverage, "CH4_timebin_cow_coverage_ge2_15v4.csv")
write_csv(ch4_bin_reliable, "CH4_by_timebin_reliable_ge2visits_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Coverage_ge2")
addWorksheet(wb, "CH4_by_bin_ge2")
writeData(wb, "Coverage_ge2", coverage)
writeData(wb, "CH4_by_bin_ge2", ch4_bin_reliable)
saveWorkbook(wb, "CH4_timebin_reliable_ge2visits_15v4.xlsx", overwrite = TRUE)

library(readr)
library(dplyr)

x <- read_csv("CH4_by_timebin_reliable_ge2visits_15v4.csv", show_col_types = FALSE)

x2 <- x %>%
  mutate(
    CH4_CI95 = paste0("(", round(CH4_lcl_95, 1), ", ", round(CH4_ucl_95, 1), ")"),
    CI_width = CH4_ucl_95 - CH4_lcl_95,
    verdict = case_when(
      n_cows_contributing < 5 | n_visits < 20 ~ "Low reliability",
      n_cows_contributing < 10 | n_visits < 50 ~ "Moderate reliability",
      TRUE ~ "Adequate reliability"
    )
  ) %>%
  arrange(grazing_treatment, time_bin_4hr)

print(x2)

## # A tibble: 12 × 12
##    grazing_treatment time_bin_4hr n_visits n_cows_contributing CH4_mean CH4_sd
##    <chr>             <chr>           <dbl>               <dbl>    <dbl>  <dbl>
##  1 K1                00:00-04:00         4                   2     357.   97.5
##  2 K1                04:00-08:00       362                  19     317.   81.5
##  3 K1                08:00-12:00       258                  19     310.   86.6
##  4 K1                12:00-16:00       292                  19     293.   96.8
##  5 K1                16:00-20:00       392                  19     312.   85.8
##  6 K1                20:00-24:00       110                  17     331.   83.2
##  7 K2                00:00-04:00        50                   9     408.   84.0
##  8 K2                04:00-08:00       202                  16     325.   84.9
##  9 K2                08:00-12:00       354                  16     330.   88.4
## 10 K2                12:00-16:00       321                  16     341.   88.5
## 11 K2                16:00-20:00       275                  16     348.   85.9
## 12 K2                20:00-24:00       127                  15     360.   88.6
## # ℹ 6 more variables: CH4_se <dbl>, CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>,
## #   CH4_CI95 <chr>, CI_width <dbl>, verdict <chr>

write_csv(x2, "CH4_timebin_reliable_ge2visits_15v4_with_CI_verdict.csv")

#Emissions and visit distribution at herd level over 24 hours
library(readr)
library(dplyr)
library(ggplot2)
library(openxlsx)

# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file   <- "cow_period_retained_15v_4bins.csv"

# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id),
    CH4_g_d = as.numeric(CH4_g_d),
    hour_of_the_day = as.numeric(hour_of_the_day)
  ) %>%
  filter(is.finite(CH4_g_d), is.finite(hour_of_the_day))

qual <- read_csv(qual_file, show_col_types = FALSE) %>%
  mutate(
    cow_id = as.character(cow_id),
    grazing_treatment = toupper(as.character(grazing_treatment)),
    period_id = as.integer(period_id)
  ) %>%
  distinct(cow_id, grazing_treatment, period_id)

dat <- vis %>%
  inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
  mutate(
    grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
    hour_24 = as.integer(floor(hour_of_the_day))
  ) %>%
  filter(hour_24 >= 0, hour_24 <= 23)

# -----------------------------
# Visit distribution by hour (0-23)
# -----------------------------
visits_hour <- dat %>%
  group_by(grazing_treatment, hour_24) %>%
  summarise(
    n_visits = n(),
    n_cows = n_distinct(cow_id),
    .groups = "drop"
  ) %>%
  complete(grazing_treatment, hour_24 = 0:23, fill = list(n_visits = 0, n_cows = 0)) %>%
  group_by(grazing_treatment) %>%
  mutate(pct_visits = 100 * n_visits / sum(n_visits)) %>%
  ungroup()

# -----------------------------
# CH4 by hour (0-23)
# -----------------------------
ch4_hour <- dat %>%
  group_by(grazing_treatment, hour_24) %>%
  summarise(
    n_visits = n(),
    n_cows = n_distinct(cow_id),
    CH4_mean = mean(CH4_g_d, na.rm = TRUE),
    CH4_sd = sd(CH4_g_d, na.rm = TRUE),
    CH4_se = CH4_sd / sqrt(n_visits),
    CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
    CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
    .groups = "drop"
  ) %>%
  complete(grazing_treatment, hour_24 = 0:23)

# -----------------------------
# Plot 1: Visit distribution by hour
# -----------------------------
p_visits <- ggplot(visits_hour, aes(x = hour_24, y = n_visits, color = grazing_treatment)) +
  geom_line(linewidth = 1.05) +
  geom_point(size = 2) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  scale_x_continuous(breaks = 0:23) +
  labs(
    title = "Visit Distribution Over 24 Hours (15v4)",
    x = "Hour of day (0-23)",
    y = "Visit count",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

# -----------------------------
# Plot 2: CH4 over 24 hours
# -----------------------------
p_ch4 <- ggplot(ch4_hour, aes(x = hour_24, y = CH4_mean, color = grazing_treatment, group = grazing_treatment)) +
  geom_line(linewidth = 1.05, na.rm = TRUE) +
  geom_point(size = 2, na.rm = TRUE) +
  geom_errorbar(aes(ymin = CH4_lcl_95, ymax = CH4_ucl_95), width = 0.2, linewidth = 0.6, na.rm = TRUE) +
  scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
  scale_x_continuous(breaks = 0:23) +
  labs(
    title = "CH4 Emissions Over 24 Hours (15v4)",
    x = "Hour of day (0-23)",
    y = "Mean CH4 (g/day)",
    color = "Treatment"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

ggsave("TOD_visits_by_hour_0_23_15v4.png", p_visits, width = 12, height = 5.5, dpi = 320)
ggsave("TOD_CH4_by_hour_0_23_15v4.png", p_ch4, width = 12, height = 5.5, dpi = 320)

print(p_visits)

print(p_ch4)

# -----------------------------
# Save tables
# -----------------------------
write_csv(visits_hour, "TOD_visits_by_hour_0_23_15v4.csv")
write_csv(ch4_hour, "TOD_CH4_by_hour_0_23_15v4.csv")

wb <- createWorkbook()
addWorksheet(wb, "Visits_by_hour")
addWorksheet(wb, "CH4_by_hour")
writeData(wb, "Visits_by_hour", visits_hour)
writeData(wb, "CH4_by_hour", ch4_hour)
saveWorkbook(wb, "TOD_CH4_and_visits_by_hour_0_23_15v4.xlsx", overwrite = TRUE)

Feb 13 Emissions Analysis Workflow

ALES Analysis

2026-02-13

Descriptive Statistics (Retained Observations)

Normality Diagnostics

Shapiro-Wilk by Treatment (K1 vs K2)

QQ Plots and Histograms