QQ Plots and Histograms
# Pre-diagnostics normality check
library(readr)
df <- read_csv("raw_data_for_diagnostics.csv", show_col_types = FALSE)
df$tr <- toupper(trimws(df$grazing_treatment))
cols <- c(K1 = "forestgreen", K2 = "steelblue")
plot_normality_by_trt <- function(trt, color) {
x_ch4 <- as.numeric(df[df$tr == trt, ][["CH4_g_d"]])
x_co2 <- as.numeric(df[df$tr == trt, ][["CO2_g_d"]])
x_ch4 <- x_ch4[is.finite(x_ch4)]
x_co2 <- x_co2[is.finite(x_co2)]
old_par <- par(no.readonly = TRUE)
on.exit(par(old_par))
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1))
hist(x_ch4, breaks = "FD", col = color, border = "white",
main = paste(trt, "CH4 Histogram"), xlab = "CH4_g_d")
qqnorm(x_ch4, pch = 19, cex = 0.6, col = color,
main = paste(trt, "CH4 QQ Plot"))
qqline(x_ch4, col = "black", lwd = 2)
hist(x_co2, breaks = "FD", col = color, border = "white",
main = paste(trt, "CO2 Histogram"), xlab = "CO2_g_d")
qqnorm(x_co2, pch = 19, cex = 0.6, col = color,
main = paste(trt, "CO2 QQ Plot"))
qqline(x_co2, col = "black", lwd = 2)
}
# View K1 only
plot_normality_by_trt("K1", cols["K1"])

# View K2 only
plot_normality_by_trt("K2", cols["K2"])

# LSMeans Calculation for CH4
library(readr)
library(dplyr)
library(lme4)
library(lmerTest)
library(emmeans)
# -----------------------------
# Load after-3SD enriched data
# -----------------------------
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE)
hms_to_minutes <- function(x) {
x <- as.character(x)
p <- strsplit(x, ":", fixed = TRUE)
sapply(p, function(v) {
if (length(v) != 3) return(NA_real_)
as.numeric(v[1]) * 60 + as.numeric(v[2]) + as.numeric(v[3]) / 60
})
}
excel_origin <- as.Date("1899-12-30")
dat2 <- dat %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(trimws(grazing_treatment))),
start_time = as.numeric(start_time),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d),
hour_of_the_day = as.numeric(hour_of_the_day),
date = factor(excel_origin + floor(start_time)),
hour = factor(round(hour_of_the_day)),
duration_min = hms_to_minutes(emission_duration)
)
# -----------------------------
# Final LSMeans models (after 3SD)
# -----------------------------
d_ch4 <- dat2 %>%
filter(is.finite(CH4_g_d), is.finite(duration_min), !is.na(date), !is.na(hour))
d_co2 <- dat2 %>%
filter(is.finite(CO2_g_d), is.finite(duration_min), !is.na(date), !is.na(hour))
m_ch4 <- lmer(
CH4_g_d ~ cow_id + duration_min + (1 | cow_id:date) + (1 | cow_id:hour),
data = d_ch4,
REML = TRUE
)
m_co2 <- lmer(
CO2_g_d ~ cow_id + duration_min + (1 | cow_id:date) + (1 | cow_id:hour),
data = d_co2,
REML = TRUE
)
# -----------------------------
# Extract cow-level LSMeans
# -----------------------------
lsm_ch4 <- as.data.frame(summary(emmeans(m_ch4, ~ cow_id))) %>%
rename(
CH4_lsmean = emmean,
CH4_SE = SE,
CH4_LCL = asymp.LCL,
CH4_UCL = asymp.UCL
) %>%
select(cow_id, CH4_lsmean, CH4_SE, CH4_LCL, CH4_UCL)
lsm_co2 <- as.data.frame(summary(emmeans(m_co2, ~ cow_id))) %>%
rename(
CO2_lsmean = emmean,
CO2_SE = SE,
CO2_LCL = asymp.LCL,
CO2_UCL = asymp.UCL
) %>%
select(cow_id, CO2_lsmean, CO2_SE, CO2_LCL, CO2_UCL)
# Add treatment + visit counts
visits <- dat2 %>%
group_by(cow_id, grazing_treatment) %>%
summarise(n_visits = n(), .groups = "drop")
cow_lsmeans <- lsm_ch4 %>%
inner_join(lsm_co2, by = "cow_id") %>%
left_join(visits, by = "cow_id") %>%
arrange(grazing_treatment, cow_id)
# Optional precision weights
cow_lsmeans <- cow_lsmeans %>%
mutate(
weight_ch4_inv_se2 = 1 / (CH4_SE^2),
weight_co2_inv_se2 = 1 / (CO2_SE^2)
)
# Save
write_csv(cow_lsmeans, "cow_lsmeans_CH4_CO2_3SD.csv")
# Preview
cat("Number of cows:", nrow(cow_lsmeans), "\n")
## Number of cows: 37
print(head(cow_lsmeans, 12))
## cow_id CH4_lsmean CH4_SE CH4_LCL CH4_UCL CO2_lsmean CO2_SE CO2_LCL
## 1 258K 248.9888 9.103959 231.1454 266.8323 8956.142 163.4676 8635.751
## 2 264F 290.6588 9.063870 272.8939 308.4236 10765.731 162.7390 10446.768
## 3 266H 345.1719 9.909498 325.7496 364.5942 11351.175 175.4951 11007.211
## 4 286J 290.2081 9.512178 271.5645 308.8516 9988.413 169.9603 9655.296
## 5 290G 355.6625 10.129619 335.8088 375.5162 12211.914 179.4756 11860.149
## 6 306K 220.2244 10.765321 199.1248 241.3241 8339.988 189.0924 7969.374
## 7 312E 325.7233 9.892952 306.3334 345.1131 10900.323 175.6940 10555.969
## 8 314J 317.1428 8.796818 299.9013 334.3842 10474.481 158.8130 10163.213
## 9 322C 324.7291 11.695099 301.8072 347.6511 10863.710 201.5420 10468.695
## 10 322F 319.6655 9.785017 300.4872 338.8437 10764.357 174.8119 10421.732
## 11 338D 224.3099 22.130105 180.9357 267.6841 10350.566 373.4940 9618.531
## 12 338J 316.7353 9.357583 298.3948 335.0759 10490.753 166.9916 10163.455
## CO2_UCL grazing_treatment n_visits weight_ch4_inv_se2 weight_co2_inv_se2
## 1 9276.532 K1 121 0.012065335 3.742283e-05
## 2 11084.693 K1 122 0.012172300 3.775865e-05
## 3 11695.139 K1 96 0.010183491 3.246907e-05
## 4 10321.529 K1 109 0.011051980 3.461824e-05
## 5 12563.680 K1 92 0.009745716 3.104481e-05
## 6 8710.602 K1 79 0.008628714 2.796739e-05
## 7 11244.677 K1 98 0.010217583 3.239562e-05
## 8 10785.748 K1 132 0.012922567 3.964861e-05
## 9 11258.725 K1 61 0.007311259 2.461892e-05
## 10 11106.982 K1 104 0.010444239 3.272337e-05
## 11 11082.601 K1 16 0.002041893 7.168572e-06
## 12 10818.050 K1 111 0.011420172 3.586002e-05
#Plot result for LSMeans
library(readr)
library(dplyr)
library(ggplot2)
# --------------------------------
# Load LSMeans output
# --------------------------------
lsm <- read_csv("cow_lsmeans_CH4_CO2_3SD.csv", show_col_types = FALSE)
# --------------------------------
# Plot function (well-spaced)
# --------------------------------
plot_lsm_by_trt <- function(df, trt, y_col, lcl_col, ucl_col, fill_col, title_txt, ylab_txt) {
d <- df %>%
filter(grazing_treatment == trt) %>%
arrange(desc(.data[[y_col]])) %>%
mutate(
cow_label = paste0(cow_id, " (n=", n_visits, ")"),
cow_label = factor(cow_label, levels = rev(cow_label)),
mean_lbl = sprintf("%.1f", .data[[y_col]])
)
ggplot(d, aes(x = cow_label, y = .data[[y_col]])) +
geom_col(fill = fill_col, alpha = 0.9, width = 0.72) +
geom_errorbar(aes(ymin = .data[[lcl_col]], ymax = .data[[ucl_col]]),
width = 0.20, color = "black", linewidth = 0.5) +
geom_text(aes(y = .data[[ucl_col]] + 0.03 * diff(range(.data[[y_col]], na.rm = TRUE)),
label = mean_lbl),
hjust = 0, size = 3.1) +
coord_flip(clip = "off") +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.20))) +
labs(
title = title_txt,
x = "Cow (visit count)",
y = ylab_txt
) +
theme_minimal(base_size = 12) +
theme(
panel.grid.major.y = element_blank(),
plot.margin = margin(10, 40, 10, 10)
)
}
# --------------------------------
# CH4 plots (same colors as before)
# --------------------------------
p_ch4_k1 <- plot_lsm_by_trt(
lsm, "K1",
y_col = "CH4_lsmean", lcl_col = "CH4_LCL", ucl_col = "CH4_UCL",
fill_col = "forestgreen",
title_txt = "K1 Cow-level Mean CH4 (LSMEANS, after 3SD)",
ylab_txt = "CH4 LSMEAN (g/day)"
)
p_ch4_k2 <- plot_lsm_by_trt(
lsm, "K2",
y_col = "CH4_lsmean", lcl_col = "CH4_LCL", ucl_col = "CH4_UCL",
fill_col = "steelblue",
title_txt = "K2 Cow-level Mean CH4 (LSMEANS, after 3SD)",
ylab_txt = "CH4 LSMEAN (g/day)"
)
# Show
p_ch4_k1

p_ch4_k2

# Save
ggsave("CH4_lsmeans_3SD_K1.png", p_ch4_k1, width = 8.5, height = 10.5, dpi = 320)
ggsave("CH4_lsmeans_3SD_K2.png", p_ch4_k2, width = 8.5, height = 10.5, dpi = 320)
# Calculating mean CH4 using the Aritmetic mean approach
library(readr)
library(dplyr)
# -----------------------------
# Load data
# -----------------------------
df <- read_csv("GreenFeed_Cleaned_K1K2.csv", show_col_types = FALSE)
# -----------------------------
# Clean + standardize
# -----------------------------
df2 <- df %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
CH4_g_d = as.numeric(CH4_g_d)
) %>%
filter(is.finite(CH4_g_d), grazing_treatment %in% c("K1", "K2"))
# -----------------------------
# Arithmetic mean CH4 per cow
# -----------------------------
cow_ch4_am <- df2 %>%
group_by(cow_id, grazing_treatment) %>%
summarise(
n_visits = n(),
CH4_mean_AM = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_visits),
CH4_lcl_95 = CH4_mean_AM - qt(0.975, df = pmax(n_visits - 1, 1)) * CH4_se,
CH4_ucl_95 = CH4_mean_AM + qt(0.975, df = pmax(n_visits - 1, 1)) * CH4_se,
.groups = "drop"
) %>%
arrange(grazing_treatment, cow_id)
# -----------------------------
# Save and preview
# -----------------------------
write_csv(cow_ch4_am, "cow_CH4_arithmetic_mean.csv")
cat("Number of cows:", nrow(cow_ch4_am), "\n")
## Number of cows: 37
print(head(cow_ch4_am, 12))
## # A tibble: 12 × 8
## cow_id grazing_treatment n_visits CH4_mean_AM CH4_sd CH4_se CH4_lcl_95
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 258K K1 122 249. 77.8 7.04 235.
## 2 264F K1 122 290. 69.3 6.27 277.
## 3 266H K1 97 351. 89.1 9.05 333.
## 4 286J K1 109 290. 88.0 8.43 273.
## 5 290G K1 93 361. 90.3 9.36 343.
## 6 306K K1 81 215. 62.0 6.89 202.
## 7 312E K1 98 330. 79.2 8.00 314.
## 8 314J K1 135 325. 83.5 7.19 311.
## 9 322C K1 61 326. 100. 12.8 300.
## 10 322F K1 106 325. 100. 9.75 305.
## 11 338D K1 16 229. 93.0 23.2 179.
## 12 338J K1 111 319. 69.3 6.58 306.
## # ℹ 1 more variable: CH4_ucl_95 <dbl>
# Comparing AM and LS Mean
library(readr)
library(dplyr)
# Load files
lsm <- read_csv("cow_lsmeans_CH4_CO2_37.csv", show_col_types = FALSE)
am <- read_csv("cow_CH4_arithmetic_mean.csv", show_col_types = FALSE)
# Merge CH4 metrics
cmp <- lsm %>%
select(cow_id, CH4_lsmean) %>%
inner_join(
am %>% select(cow_id, grazing_treatment, n_visits, CH4_mean_AM),
by = "cow_id"
) %>%
mutate(
diff = CH4_lsmean - CH4_mean_AM,
abs_diff = abs(diff),
rank_lsm = rank(-CH4_lsmean, ties.method = "average"),
rank_am = rank(-CH4_mean_AM, ties.method = "average"),
rank_shift = rank_lsm - rank_am,
abs_rank_shift = abs(rank_shift)
)
# Correlations
pear <- cor.test(cmp$CH4_lsmean, cmp$CH4_mean_AM, method = "pearson")
spear <- cor.test(cmp$rank_lsm, cmp$rank_am, method = "spearman", exact = FALSE)
# Bland-Altman style summary
mean_diff <- mean(cmp$diff, na.rm = TRUE)
sd_diff <- sd(cmp$diff, na.rm = TRUE)
loa_low <- mean_diff - 1.96 * sd_diff
loa_high <- mean_diff + 1.96 * sd_diff
# Top-k overlap
k <- 10
top_lsm <- cmp %>% arrange(desc(CH4_lsmean)) %>% slice(1:k) %>% pull(cow_id)
top_am <- cmp %>% arrange(desc(CH4_mean_AM)) %>% slice(1:k) %>% pull(cow_id)
top_overlap <- length(intersect(top_lsm, top_am))
# By-treatment averages
by_treatment <- cmp %>%
group_by(grazing_treatment) %>%
summarise(
CH4_lsmean_mean = mean(CH4_lsmean, na.rm = TRUE),
CH4_AM_mean = mean(CH4_mean_AM, na.rm = TRUE),
mean_diff = mean(diff, na.rm = TRUE),
.groups = "drop"
)
# Rank shift table
rank_shift_tbl <- cmp %>%
arrange(desc(abs_rank_shift), cow_id) %>%
select(
cow_id, grazing_treatment, n_visits,
CH4_lsmean, CH4_mean_AM, diff,
rank_lsm, rank_am, rank_shift, abs_rank_shift
)
# Summary table
summary_tbl <- tibble::tibble(
metric = c(
"n_cows",
"mean_CH4_lsmean",
"mean_CH4_AM",
"mean_diff_LSM_minus_AM",
"median_diff_LSM_minus_AM",
"min_diff",
"max_diff",
"mean_abs_diff",
"loa_low_95",
"loa_high_95",
"pearson_r",
"pearson_p",
"spearman_rho_rank",
"spearman_p_rank",
"top10_overlap",
"mean_abs_rank_shift",
"max_abs_rank_shift"
),
value = c(
nrow(cmp),
mean(cmp$CH4_lsmean, na.rm = TRUE),
mean(cmp$CH4_mean_AM, na.rm = TRUE),
mean_diff,
median(cmp$diff, na.rm = TRUE),
min(cmp$diff, na.rm = TRUE),
max(cmp$diff, na.rm = TRUE),
mean(cmp$abs_diff, na.rm = TRUE),
loa_low,
loa_high,
unname(pear$estimate),
pear$p.value,
unname(spear$estimate),
spear$p.value,
top_overlap,
mean(cmp$abs_rank_shift, na.rm = TRUE),
max(cmp$abs_rank_shift, na.rm = TRUE)
)
)
# Write outputs
write_csv(cmp, "CH4_LSM_vs_AM_comparison_by_cow.csv")
write_csv(rank_shift_tbl, "CH4_LSM_vs_AM_rank_shifts.csv")
write_csv(by_treatment, "CH4_LSM_vs_AM_by_treatment.csv")
write_csv(summary_tbl, "CH4_LSM_vs_AM_summary_metrics.csv")
# Print
print(summary_tbl, n = nrow(summary_tbl))
## # A tibble: 17 × 2
## metric value
## <chr> <dbl>
## 1 n_cows 3.7 e+ 1
## 2 mean_CH4_lsmean 3.16e+ 2
## 3 mean_CH4_AM 3.18e+ 2
## 4 mean_diff_LSM_minus_AM -2.09e+ 0
## 5 median_diff_LSM_minus_AM -1.93e+ 0
## 6 min_diff -9.81e+ 0
## 7 max_diff 2.56e+ 0
## 8 mean_abs_diff 2.50e+ 0
## 9 loa_low_95 -7.14e+ 0
## 10 loa_high_95 2.95e+ 0
## 11 pearson_r 9.98e- 1
## 12 pearson_p 2.95e-43
## 13 spearman_rho_rank 9.94e- 1
## 14 spearman_p_rank 1.23e-35
## 15 top10_overlap 1 e+ 1
## 16 mean_abs_rank_shift 8.11e- 1
## 17 max_abs_rank_shift 3 e+ 0
print(by_treatment)
## # A tibble: 2 × 4
## grazing_treatment CH4_lsmean_mean CH4_AM_mean mean_diff
## <chr> <dbl> <dbl> <dbl>
## 1 K1 300. 303. -3.00
## 2 K2 335. 336. -1.02
print(head(rank_shift_tbl, 15))
## # A tibble: 15 × 10
## cow_id grazing_treatment n_visits CH4_lsmean CH4_mean_AM diff rank_lsm
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 442B K2 45 306. 304. 2.34 24
## 2 264F K1 122 290. 290. 0.740 29
## 3 322F K1 106 318. 325. -6.30 20
## 4 326G K2 100 323. 323. -0.0713 17
## 5 364B K1 78 338. 347. -8.95 10
## 6 400G K2 67 319. 318. 0.184 19
## 7 404E K2 91 330. 330. 0.470 12
## 8 252H K2 134 381. 382. -1.27 2
## 9 260K K2 127 306. 308. -1.49 25
## 10 286J K1 109 290. 290. -0.381 31
## 11 312E K1 98 326. 330. -3.55 14
## 12 312F K2 150 344. 347. -2.55 8
## 13 314J K1 135 322. 325. -2.71 18
## 14 332H K2 101 340. 344. -3.90 9
## 15 338J K1 111 316. 319. -2.58 21
## # ℹ 3 more variables: rank_am <dbl>, rank_shift <dbl>, abs_rank_shift <dbl>
#save visit summary per day:
library(readr)
library(dplyr)
library(openxlsx)
# Load after-3SD enriched data
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
mutate(
grazing_treatment = toupper(trimws(grazing_treatment)),
start_time = as.numeric(start_time),
visit_date = as.Date("1899-12-30") + floor(start_time)
) %>%
filter(grazing_treatment %in% c("K1", "K2"))
# Visits per day by treatment
visits_day <- dat %>%
group_by(visit_date, grazing_treatment) %>%
summarise(n_visits = n(), .groups = "drop") %>%
arrange(visit_date, grazing_treatment)
# Summary table
visits_summary <- visits_day %>%
group_by(grazing_treatment) %>%
summarise(
n_days = n(),
mean_visits_per_day = mean(n_visits, na.rm = TRUE),
sd_visits_per_day = sd(n_visits, na.rm = TRUE),
min_visits_per_day = min(n_visits, na.rm = TRUE),
max_visits_per_day = max(n_visits, na.rm = TRUE),
.groups = "drop"
)
# Optional wide table (date rows, K1/K2 columns)
visits_wide <- visits_day %>%
tidyr::pivot_wider(
names_from = grazing_treatment,
values_from = n_visits,
values_fill = 0
) %>%
arrange(visit_date)
# Save to Excel (multiple sheets)
out_xlsx <- "Visits_Per_Day_K1_K2_After3SD.xlsx"
wb <- createWorkbook()
addWorksheet(wb, "Visits_By_Day_Long")
writeData(wb, "Visits_By_Day_Long", visits_day)
addWorksheet(wb, "Visits_By_Day_Wide")
writeData(wb, "Visits_By_Day_Wide", visits_wide)
addWorksheet(wb, "Summary")
writeData(wb, "Summary", visits_summary)
saveWorkbook(wb, out_xlsx, overwrite = TRUE)
cat("Saved:", out_xlsx, "\n")
## Saved: Visits_Per_Day_K1_K2_After3SD.xlsx
print(visits_summary)
## # A tibble: 2 × 6
## grazing_treatment n_days mean_visits_per_day sd_visits_per_day
## <chr> <int> <dbl> <dbl>
## 1 K1 64 27.5 12.2
## 2 K2 75 21.3 10.3
## # ℹ 2 more variables: min_visits_per_day <int>, max_visits_per_day <int>
# Create 14 days period
library(readr)
library(dplyr)
library(writexl)
# Load after-3SD dataset
dat <- read_csv("analysis_3SD_enriched.csv", show_col_types = FALSE) %>%
mutate(
start_time = as.numeric(start_time),
visit_date = as.Date("1899-12-30") + floor(start_time),
grazing_treatment = toupper(trimws(grazing_treatment))
)
# Build ordered day index
day_tbl <- dat %>%
distinct(visit_date) %>%
arrange(visit_date) %>%
mutate(day_index = row_number())
# Parameters (your chosen rule)
drop_first_days <- 3
drop_last_days <- 2
period_len <- 14
n_days_total <- nrow(day_tbl)
# Keep middle block
day_tbl2 <- day_tbl %>%
filter(
day_index > drop_first_days,
day_index <= (n_days_total - drop_last_days)
) %>%
mutate(
kept_day_index = row_number(),
period_id = ((kept_day_index - 1) %/% period_len) + 1
)
# Check full periods only (optional strict filter)
max_full_period <- floor(nrow(day_tbl2) / period_len)
day_tbl2 <- day_tbl2 %>% filter(period_id <= max_full_period)
# Join period labels back to observations
dat_period <- dat %>%
inner_join(day_tbl2 %>% select(visit_date, period_id), by = "visit_date")
# QA tables
period_day_counts <- day_tbl2 %>%
group_by(period_id) %>%
summarise(
n_days = n(),
start_date = min(visit_date),
end_date = max(visit_date),
.groups = "drop"
)
period_visit_counts <- dat_period %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_visits = n(),
n_cows = n_distinct(cow_id),
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
cat("Total days:", n_days_total, "\n")
## Total days: 75
cat("Dropped first:", drop_first_days, " | Dropped last:", drop_last_days, "\n")
## Dropped first: 3 | Dropped last: 2
cat("Days retained:", nrow(day_tbl2), "\n")
## Days retained: 70
cat("Periods retained:", max(day_tbl2$period_id), "\n\n")
## Periods retained: 5
print(period_day_counts)
## # A tibble: 5 × 4
## period_id n_days start_date end_date
## <dbl> <int> <date> <date>
## 1 1 14 2025-06-30 2025-07-13
## 2 2 14 2025-07-14 2025-07-27
## 3 3 14 2025-07-28 2025-08-10
## 4 4 14 2025-08-11 2025-08-24
## 5 5 14 2025-08-25 2025-09-07
print(period_visit_counts)
## # A tibble: 10 × 4
## period_id grazing_treatment n_visits n_cows
## <dbl> <chr> <int> <int>
## 1 1 K1 381 18
## 2 1 K2 331 14
## 3 2 K1 177 18
## 4 2 K2 235 15
## 5 3 K1 267 18
## 6 3 K2 331 16
## 7 4 K1 528 19
## 8 4 K2 323 17
## 9 5 K1 382 20
## 10 5 K2 297 17
# Save outputs
write_csv(dat_period, "analysis_3SD_with_14day_periods.csv")
write_csv(period_day_counts, "period_day_counts_14day.csv")
write_csv(period_visit_counts, "period_visit_counts_14day.csv")
write_xlsx(
list(
data_with_periods = dat_period,
period_day_counts = period_day_counts,
period_visit_counts = period_visit_counts
),
"analysis_3SD_with_14day_periods.xlsx"
)
# Create 4-hour time bins
library(readr)
library(dplyr)
# Load period-tagged dataset (from previous step)
dat <- read_csv("analysis_3SD_with_14day_periods.csv", show_col_types = FALSE)
# Create 6 time bins of 4 hours each using hour_of_the_day
dat_bins <- dat %>%
mutate(
hour_of_the_day = as.numeric(hour_of_the_day),
hour_int = floor(hour_of_the_day),
time_bin_4hr = case_when(
hour_int >= 0 & hour_int < 4 ~ "00:00-04:00",
hour_int >= 4 & hour_int < 8 ~ "04:00-08:00",
hour_int >= 8 & hour_int < 12 ~ "08:00-12:00",
hour_int >= 12 & hour_int < 16 ~ "12:00-16:00",
hour_int >= 16 & hour_int < 20 ~ "16:00-20:00",
hour_int >= 20 & hour_int <= 23 ~ "20:00-24:00",
TRUE ~ NA_character_
),
time_bin_4hr = factor(
time_bin_4hr,
levels = c(
"00:00-04:00", "04:00-08:00", "08:00-12:00",
"12:00-16:00", "16:00-20:00", "20:00-24:00"
)
)
)
# Quick checks
bin_counts <- dat_bins %>%
count(time_bin_4hr, sort = FALSE)
bin_counts_by_trt <- dat_bins %>%
mutate(grazing_treatment = toupper(trimws(grazing_treatment))) %>%
count(grazing_treatment, time_bin_4hr, sort = FALSE)
print(bin_counts)
## # A tibble: 6 × 2
## time_bin_4hr n
## <fct> <int>
## 1 00:00-04:00 69
## 2 04:00-08:00 648
## 3 08:00-12:00 717
## 4 12:00-16:00 749
## 5 16:00-20:00 794
## 6 20:00-24:00 275
print(bin_counts_by_trt)
## # A tibble: 12 × 3
## grazing_treatment time_bin_4hr n
## <chr> <fct> <int>
## 1 K1 00:00-04:00 13
## 2 K1 04:00-08:00 425
## 3 K1 08:00-12:00 309
## 4 K1 12:00-16:00 379
## 5 K1 16:00-20:00 470
## 6 K1 20:00-24:00 139
## 7 K2 00:00-04:00 56
## 8 K2 04:00-08:00 223
## 9 K2 08:00-12:00 408
## 10 K2 12:00-16:00 370
## 11 K2 16:00-20:00 324
## 12 K2 20:00-24:00 136
# Save
write_csv(dat_bins, "analysis_3SD_with_14day_periods_4hr_bins.csv")
write_csv(bin_counts, "time_bin_4hr_counts.csv")
write_csv(bin_counts_by_trt, "time_bin_4hr_counts_by_treatment.csv")
# Filter number of visits more than or equal to 20 and time bin is at least 5 of 6
library(readr)
library(dplyr)
library(writexl)
# Load data with periods + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id),
time_bin_4hr = as.character(time_bin_4hr)
)
# Criteria:
# 1) >= 20 visits per cow per period
# 2) >= 5 of 6 time bins represented per cow per period
cow_period_stats <- dat %>%
filter(!is.na(period_id), !is.na(time_bin_4hr)) %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(
n_visits = n(),
n_bins_covered = n_distinct(time_bin_4hr),
bins_covered = paste(sort(unique(time_bin_4hr)), collapse = "; "),
.groups = "drop"
) %>%
mutate(
pass_visits = n_visits >= 20,
pass_bins = n_bins_covered >= 5,
pass_all = pass_visits & pass_bins,
fail_reason = case_when(
!pass_visits & !pass_bins ~ "Fail visits + bins",
!pass_visits & pass_bins ~ "Fail visits only",
pass_visits & !pass_bins ~ "Fail bins only",
TRUE ~ "Pass"
)
)
# Failed combinations
failed_cow_periods <- cow_period_stats %>%
filter(!pass_all) %>%
arrange(period_id, grazing_treatment, cow_id)
# Passed combinations
passed_cow_periods <- cow_period_stats %>%
filter(pass_all) %>%
arrange(period_id, grazing_treatment, cow_id)
# Keep only observations from passed cow-periods
dat_passed <- dat %>%
inner_join(
passed_cow_periods %>% select(cow_id, grazing_treatment, period_id),
by = c("cow_id", "grazing_treatment", "period_id")
)
# Summary by period/treatment
summary_tbl <- cow_period_stats %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_cow_period = n(),
n_pass = sum(pass_all),
n_fail = sum(!pass_all),
n_fail_visits = sum(!pass_visits),
n_fail_bins = sum(!pass_bins),
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
# Print key results
cat("Total cow-period combinations:", nrow(cow_period_stats), "\n")
## Total cow-period combinations: 172
cat("Passed:", nrow(passed_cow_periods), "\n")
## Passed: 69
cat("Failed:", nrow(failed_cow_periods), "\n\n")
## Failed: 103
print(summary_tbl, n = nrow(summary_tbl))
## # A tibble: 10 × 7
## period_id grazing_treatment n_cow_period n_pass n_fail n_fail_visits
## <int> <chr> <int> <int> <int> <int>
## 1 1 K1 18 10 8 8
## 2 1 K2 14 9 5 5
## 3 2 K1 18 1 17 17
## 4 2 K2 15 3 12 12
## 5 3 K1 18 2 16 16
## 6 3 K2 16 9 7 6
## 7 4 K1 19 16 3 2
## 8 4 K2 17 6 11 8
## 9 5 K1 20 7 13 9
## 10 5 K2 17 6 11 11
## # ℹ 1 more variable: n_fail_bins <int>
print(failed_cow_periods, n = nrow(failed_cow_periods))
## # A tibble: 103 × 10
## cow_id grazing_treatment period_id n_visits n_bins_covered bins_covered
## <chr> <chr> <int> <int> <int> <chr>
## 1 306K K1 1 18 4 04:00-08:00; 12:…
## 2 322C K1 1 13 5 04:00-08:00; 08:…
## 3 364B K1 1 19 5 04:00-08:00; 08:…
## 4 370F K1 1 18 5 04:00-08:00; 08:…
## 5 410C K1 1 15 5 04:00-08:00; 08:…
## 6 426D K1 1 18 5 04:00-08:00; 08:…
## 7 436E K1 1 17 5 04:00-08:00; 08:…
## 8 454C K1 1 14 5 04:00-08:00; 08:…
## 9 266D K2 1 17 4 04:00-08:00; 08:…
## 10 400G K2 1 8 5 00:00-04:00; 04:…
## 11 404E K2 1 9 5 00:00-04:00; 04:…
## 12 442B K2 1 5 3 08:00-12:00; 12:…
## 13 446K K2 1 19 5 00:00-04:00; 04:…
## 14 264F K1 2 14 4 04:00-08:00; 12:…
## 15 266H K1 2 7 4 04:00-08:00; 08:…
## 16 286J K1 2 8 5 04:00-08:00; 08:…
## 17 290G K1 2 5 4 04:00-08:00; 08:…
## 18 306K K1 2 10 4 04:00-08:00; 12:…
## 19 312E K1 2 9 4 04:00-08:00; 08:…
## 20 314J K1 2 17 4 04:00-08:00; 12:…
## 21 322C K1 2 2 1 12:00-16:00
## 22 322F K1 2 10 5 04:00-08:00; 08:…
## 23 338J K1 2 10 5 04:00-08:00; 08:…
## 24 364B K1 2 3 2 12:00-16:00; 16:…
## 25 370F K1 2 8 4 04:00-08:00; 08:…
## 26 370H K1 2 15 5 04:00-08:00; 08:…
## 27 410C K1 2 11 6 00:00-04:00; 04:…
## 28 426D K1 2 8 5 00:00-04:00; 04:…
## 29 436E K1 2 9 5 04:00-08:00; 08:…
## 30 454C K1 2 9 4 08:00-12:00; 12:…
## 31 252H K2 2 18 6 00:00-04:00; 04:…
## 32 260K K2 2 19 4 04:00-08:00; 08:…
## 33 266D K2 2 15 5 04:00-08:00; 08:…
## 34 270G K2 2 18 6 00:00-04:00; 04:…
## 35 296G K2 2 14 5 04:00-08:00; 08:…
## 36 296H K2 2 17 5 04:00-08:00; 08:…
## 37 326G K2 2 15 5 04:00-08:00; 08:…
## 38 332H K2 2 12 5 04:00-08:00; 08:…
## 39 368J K2 2 15 5 00:00-04:00; 04:…
## 40 400G K2 2 7 3 12:00-16:00; 16:…
## 41 442B K2 2 6 4 08:00-12:00; 12:…
## 42 446K K2 2 12 4 00:00-04:00; 04:…
## 43 258K K1 3 19 5 04:00-08:00; 08:…
## 44 266H K1 3 16 5 04:00-08:00; 08:…
## 45 286J K1 3 19 5 04:00-08:00; 08:…
## 46 290G K1 3 14 5 04:00-08:00; 08:…
## 47 306K K1 3 13 3 04:00-08:00; 12:…
## 48 312E K1 3 15 4 04:00-08:00; 08:…
## 49 314J K1 3 18 5 04:00-08:00; 08:…
## 50 322C K1 3 8 5 04:00-08:00; 08:…
## 51 322F K1 3 18 5 04:00-08:00; 08:…
## 52 338J K1 3 14 4 04:00-08:00; 08:…
## 53 364B K1 3 8 4 04:00-08:00; 08:…
## 54 370F K1 3 16 5 04:00-08:00; 08:…
## 55 410C K1 3 12 4 04:00-08:00; 08:…
## 56 426D K1 3 13 4 04:00-08:00; 08:…
## 57 436E K1 3 11 5 04:00-08:00; 08:…
## 58 454C K1 3 12 5 04:00-08:00; 08:…
## 59 260C K2 3 17 4 04:00-08:00; 08:…
## 60 266D K2 3 14 5 04:00-08:00; 08:…
## 61 332H K2 3 17 5 04:00-08:00; 08:…
## 62 400G K2 3 16 5 00:00-04:00; 04:…
## 63 404E K2 3 21 4 04:00-08:00; 08:…
## 64 442B K2 3 11 5 04:00-08:00; 08:…
## 65 446K K2 3 17 5 04:00-08:00; 08:…
## 66 306K K1 4 20 4 04:00-08:00; 08:…
## 67 338D K1 4 7 3 08:00-12:00; 12:…
## 68 410C K1 4 19 5 04:00-08:00; 08:…
## 69 266D K2 4 17 4 04:00-08:00; 08:…
## 70 296G K2 4 18 5 04:00-08:00; 08:…
## 71 296H K2 4 19 4 04:00-08:00; 08:…
## 72 326G K2 4 20 4 04:00-08:00; 08:…
## 73 332H K2 4 18 5 04:00-08:00; 08:…
## 74 368J K2 4 25 4 04:00-08:00; 08:…
## 75 398G K2 4 19 4 04:00-08:00; 08:…
## 76 400G K2 4 17 5 04:00-08:00; 08:…
## 77 422J K2 4 2 2 12:00-16:00; 16:…
## 78 442B K2 4 15 4 04:00-08:00; 08:…
## 79 446K K2 4 23 3 08:00-12:00; 12:…
## 80 264F K1 5 24 4 04:00-08:00; 08:…
## 81 266H K1 5 22 4 04:00-08:00; 08:…
## 82 306K K1 5 17 4 04:00-08:00; 08:…
## 83 314J K1 5 29 4 04:00-08:00; 08:…
## 84 322C K1 5 17 5 04:00-08:00; 08:…
## 85 322F K1 5 14 5 04:00-08:00; 08:…
## 86 338D K1 5 9 4 08:00-12:00; 12:…
## 87 364B K1 5 16 5 00:00-04:00; 04:…
## 88 370H K1 5 11 5 04:00-08:00; 08:…
## 89 374E K1 5 16 5 04:00-08:00; 08:…
## 90 426D K1 5 16 5 04:00-08:00; 08:…
## 91 436E K1 5 20 4 04:00-08:00; 08:…
## 92 454C K1 5 12 4 08:00-12:00; 12:…
## 93 266D K2 5 13 4 04:00-08:00; 08:…
## 94 296H K2 5 17 5 04:00-08:00; 08:…
## 95 326G K2 5 18 5 04:00-08:00; 08:…
## 96 332H K2 5 18 5 04:00-08:00; 08:…
## 97 368J K2 5 17 5 04:00-08:00; 08:…
## 98 398G K2 5 13 4 04:00-08:00; 08:…
## 99 400G K2 5 17 4 04:00-08:00; 08:…
## 100 404E K2 5 17 3 08:00-12:00; 12:…
## 101 422J K2 5 12 3 08:00-12:00; 12:…
## 102 442B K2 5 7 4 04:00-08:00; 08:…
## 103 446K K2 5 17 4 08:00-12:00; 12:…
## # ℹ 4 more variables: pass_visits <lgl>, pass_bins <lgl>, pass_all <lgl>,
## # fail_reason <chr>
# Save CSV outputs
write_csv(cow_period_stats, "cow_period_criteria_stats_14day.csv")
write_csv(failed_cow_periods, "failed_cow_periods_14day.csv")
write_csv(passed_cow_periods, "passed_cow_periods_14day.csv")
write_csv(dat_passed, "analysis_3SD_14day_passed_only.csv")
write_csv(summary_tbl, "cow_period_criteria_summary_14day.csv")
# Save Excel workbook
write_xlsx(
list(
cow_period_stats = cow_period_stats,
failed_cow_periods = failed_cow_periods,
passed_cow_periods = passed_cow_periods,
summary = summary_tbl,
data_passed_only = dat_passed
),
"TOD_14day_filtering_results.xlsx"
)
#Cow loss to filtering
library(readr)
library(dplyr)
# Files
before <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE)
after <- read_csv("analysis_3SD_14day_passed_only.csv", show_col_types = FALSE)
cp <- read_csv("cow_period_criteria_stats_14day.csv", show_col_types = FALSE)
# Counts
obs_before <- nrow(before)
obs_after <- nrow(after)
cows_before <- n_distinct(before$cow_id)
cows_after <- n_distinct(after$cow_id)
cowperiod_before <- nrow(cp)
cowperiod_after <- sum(cp$pass_all, na.rm = TRUE)
# Percent loss/retained
summary_loss <- tibble(
group = c("Observations", "Cows", "Cow-periods"),
n_before = c(obs_before, cows_before, cowperiod_before),
n_after = c(obs_after, cows_after, cowperiod_after)
) %>%
mutate(
n_lost = n_before - n_after,
pct_lost = 100 * n_lost / n_before,
pct_retained = 100 * n_after / n_before
)
print(summary_loss)
## # A tibble: 3 × 6
## group n_before n_after n_lost pct_lost pct_retained
## <chr> <int> <int> <int> <dbl> <dbl>
## 1 Observations 3252 1772 1480 45.5 54.5
## 2 Cows 37 29 8 21.6 78.4
## 3 Cow-periods 172 69 103 59.9 40.1
# Optional: save
write_csv(summary_loss, "loss_summary_20visits_5bins.csv")
# cow ids with <20 visits per 14-day period
library(readr)
library(dplyr)
# -----------------------------
# Load data
# -----------------------------
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
filter(!is.na(period_id))
# -----------------------------
# Visits per cow per period
# -----------------------------
cow_period_visits <- dat %>%
group_by(period_id, grazing_treatment, cow_id) %>%
summarise(n_visits = n(), .groups = "drop")
# Cows failing criterion: <20 visits
fail_lt20 <- cow_period_visits %>%
filter(n_visits < 20) %>%
arrange(period_id, grazing_treatment, cow_id)
# -----------------------------
# Counts
# -----------------------------
# By period
fail_lt20_counts <- fail_lt20 %>%
group_by(period_id) %>%
summarise(
n_cows_lt20 = n_distinct(cow_id),
n_cow_period_lt20 = n(),
.groups = "drop"
) %>%
arrange(period_id)
# By period x treatment
fail_lt20_counts_trt <- fail_lt20 %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_cows_lt20 = n_distinct(cow_id),
n_cow_period_lt20 = n(),
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
# -----------------------------
# Print
# -----------------------------
cat("=== Cows with <20 visits by period ===\n")
## === Cows with <20 visits by period ===
if (nrow(fail_lt20_counts) == 0) {
cat("No cows had <20 visits in any 14-day period.\n")
} else {
print(fail_lt20_counts, n = nrow(fail_lt20_counts))
}
## # A tibble: 5 × 3
## period_id n_cows_lt20 n_cow_period_lt20
## <int> <int> <int>
## 1 1 13 13
## 2 2 29 29
## 3 3 22 22
## 4 4 10 10
## 5 5 20 20
cat("\n=== Cows with <20 visits by period x treatment ===\n")
##
## === Cows with <20 visits by period x treatment ===
if (nrow(fail_lt20_counts_trt) == 0) {
cat("No cows had <20 visits in any 14-day period x treatment.\n")
} else {
print(fail_lt20_counts_trt, n = nrow(fail_lt20_counts_trt))
}
## # A tibble: 10 × 4
## period_id grazing_treatment n_cows_lt20 n_cow_period_lt20
## <int> <chr> <int> <int>
## 1 1 K1 8 8
## 2 1 K2 5 5
## 3 2 K1 17 17
## 4 2 K2 12 12
## 5 3 K1 16 16
## 6 3 K2 6 6
## 7 4 K1 2 2
## 8 4 K2 8 8
## 9 5 K1 9 9
## 10 5 K2 11 11
cat("\n=== IDs failing <20 visits (cow-period level) ===\n")
##
## === IDs failing <20 visits (cow-period level) ===
if (nrow(fail_lt20) == 0) {
cat("No failing cow-period rows for <20 visits.\n")
} else {
print(fail_lt20, n = nrow(fail_lt20))
}
## # A tibble: 94 × 4
## period_id grazing_treatment cow_id n_visits
## <int> <chr> <chr> <int>
## 1 1 K1 306K 18
## 2 1 K1 322C 13
## 3 1 K1 364B 19
## 4 1 K1 370F 18
## 5 1 K1 410C 15
## 6 1 K1 426D 18
## 7 1 K1 436E 17
## 8 1 K1 454C 14
## 9 1 K2 266D 17
## 10 1 K2 400G 8
## 11 1 K2 404E 9
## 12 1 K2 442B 5
## 13 1 K2 446K 19
## 14 2 K1 264F 14
## 15 2 K1 266H 7
## 16 2 K1 286J 8
## 17 2 K1 290G 5
## 18 2 K1 306K 10
## 19 2 K1 312E 9
## 20 2 K1 314J 17
## 21 2 K1 322C 2
## 22 2 K1 322F 10
## 23 2 K1 338J 10
## 24 2 K1 364B 3
## 25 2 K1 370F 8
## 26 2 K1 370H 15
## 27 2 K1 410C 11
## 28 2 K1 426D 8
## 29 2 K1 436E 9
## 30 2 K1 454C 9
## 31 2 K2 252H 18
## 32 2 K2 260K 19
## 33 2 K2 266D 15
## 34 2 K2 270G 18
## 35 2 K2 296G 14
## 36 2 K2 296H 17
## 37 2 K2 326G 15
## 38 2 K2 332H 12
## 39 2 K2 368J 15
## 40 2 K2 400G 7
## 41 2 K2 442B 6
## 42 2 K2 446K 12
## 43 3 K1 258K 19
## 44 3 K1 266H 16
## 45 3 K1 286J 19
## 46 3 K1 290G 14
## 47 3 K1 306K 13
## 48 3 K1 312E 15
## 49 3 K1 314J 18
## 50 3 K1 322C 8
## 51 3 K1 322F 18
## 52 3 K1 338J 14
## 53 3 K1 364B 8
## 54 3 K1 370F 16
## 55 3 K1 410C 12
## 56 3 K1 426D 13
## 57 3 K1 436E 11
## 58 3 K1 454C 12
## 59 3 K2 260C 17
## 60 3 K2 266D 14
## 61 3 K2 332H 17
## 62 3 K2 400G 16
## 63 3 K2 442B 11
## 64 3 K2 446K 17
## 65 4 K1 338D 7
## 66 4 K1 410C 19
## 67 4 K2 266D 17
## 68 4 K2 296G 18
## 69 4 K2 296H 19
## 70 4 K2 332H 18
## 71 4 K2 398G 19
## 72 4 K2 400G 17
## 73 4 K2 422J 2
## 74 4 K2 442B 15
## 75 5 K1 306K 17
## 76 5 K1 322C 17
## 77 5 K1 322F 14
## 78 5 K1 338D 9
## 79 5 K1 364B 16
## 80 5 K1 370H 11
## 81 5 K1 374E 16
## 82 5 K1 426D 16
## 83 5 K1 454C 12
## 84 5 K2 266D 13
## 85 5 K2 296H 17
## 86 5 K2 326G 18
## 87 5 K2 332H 18
## 88 5 K2 368J 17
## 89 5 K2 398G 13
## 90 5 K2 400G 17
## 91 5 K2 404E 17
## 92 5 K2 422J 12
## 93 5 K2 442B 7
## 94 5 K2 446K 17
# -----------------------------
# Save outputs
# -----------------------------
write_csv(cow_period_visits, "cow_period_visits_14day.csv")
write_csv(fail_lt20, "failed_lt20_visits_cow_period_14day.csv")
write_csv(fail_lt20_counts, "failed_lt20_visits_counts_by_period_14day.csv")
write_csv(fail_lt20_counts_trt, "failed_lt20_visits_counts_by_period_treatment_14day.csv")
# Check how many cows qualify for each period:
library(readr)
library(dplyr)
library(tidyr)
library(writexl)
# Load data with period + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1", "K2"))
# Per cow x period stats
cow_period_stats <- dat %>%
group_by(period_id, grazing_treatment, cow_id) %>%
summarise(
n_visits = n(),
n_bins_covered = n_distinct(time_bin_4hr),
.groups = "drop"
) %>%
mutate(
pass_visits = n_visits >= 20,
pass_bins = n_bins_covered >= 5,
pass_all = pass_visits & pass_bins
)
# Qualified cows per period x treatment
qualified_by_period_trt <- cow_period_stats %>%
filter(pass_all) %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_qualified_cows = n_distinct(cow_id),
qualified_cow_ids = paste(sort(unique(cow_id)), collapse = ", "),
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
# Also show totals evaluated per period x treatment
evaluated_by_period_trt <- cow_period_stats %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_cows_evaluated = n_distinct(cow_id),
.groups = "drop"
)
summary_table <- evaluated_by_period_trt %>%
left_join(qualified_by_period_trt, by = c("period_id", "grazing_treatment")) %>%
mutate(
n_qualified_cows = ifelse(is.na(n_qualified_cows), 0L, n_qualified_cows),
qualified_cow_ids = ifelse(is.na(qualified_cow_ids), "", qualified_cow_ids),
pct_qualified = round(100 * n_qualified_cows / n_cows_evaluated, 1)
) %>%
arrange(period_id, grazing_treatment)
# Optional wide count table
summary_wide <- summary_table %>%
select(period_id, grazing_treatment, n_qualified_cows) %>%
pivot_wider(
names_from = grazing_treatment,
values_from = n_qualified_cows,
values_fill = 0,
names_prefix = "qualified_"
) %>%
arrange(period_id)
cat("=== Qualified cows per period by treatment (5/6 bins + >=20 visits) ===\n")
## === Qualified cows per period by treatment (5/6 bins + >=20 visits) ===
print(summary_table, n = nrow(summary_table))
## # A tibble: 10 × 6
## period_id grazing_treatment n_cows_evaluated n_qualified_cows
## <int> <chr> <int> <int>
## 1 1 K1 18 10
## 2 1 K2 14 9
## 3 2 K1 18 1
## 4 2 K2 15 3
## 5 3 K1 18 2
## 6 3 K2 16 9
## 7 4 K1 19 16
## 8 4 K2 17 6
## 9 5 K1 20 7
## 10 5 K2 17 6
## # ℹ 2 more variables: qualified_cow_ids <chr>, pct_qualified <dbl>
cat("\n=== Wide count table ===\n")
##
## === Wide count table ===
print(summary_wide, n = nrow(summary_wide))
## # A tibble: 5 × 3
## period_id qualified_K1 qualified_K2
## <int> <int> <int>
## 1 1 10 9
## 2 2 1 3
## 3 3 2 9
## 4 4 16 6
## 5 5 7 6
# Save outputs
write_csv(cow_period_stats, "cow_period_stats_criteria_14day.csv")
write_csv(summary_table, "qualified_cows_per_period_treatment_14day.csv")
write_csv(summary_wide, "qualified_cows_per_period_treatment_14day_wide.csv")
write_xlsx(
list(
cow_period_stats = cow_period_stats,
qualified_summary = summary_table,
qualified_counts_wide = summary_wide
),
"qualified_cows_per_period_treatment_14day.xlsx"
)
#Check why cows failed
library(readr)
library(dplyr)
library(tidyr)
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1","K2"))
cow_period <- dat %>%
group_by(period_id, grazing_treatment, cow_id) %>%
summarise(
n_visits = n(),
n_bins = n_distinct(time_bin_4hr),
.groups = "drop"
)
# 1) Why cows fail under current rule (>=20 visits and >=5 bins)
fail_diag <- cow_period %>%
mutate(
fail_visits = n_visits < 20,
fail_bins = n_bins < 5,
fail_type = case_when(
fail_visits & fail_bins ~ "both",
fail_visits ~ "visits_only",
fail_bins ~ "bins_only",
TRUE ~ "pass"
)
) %>%
group_by(period_id, grazing_treatment, fail_type) %>%
summarise(n_cows = n(), .groups = "drop") %>%
arrange(period_id, grazing_treatment, fail_type)
print(fail_diag, n = nrow(fail_diag))
## # A tibble: 34 × 4
## period_id grazing_treatment fail_type n_cows
## <int> <chr> <chr> <int>
## 1 1 K1 both 1
## 2 1 K1 pass 10
## 3 1 K1 visits_only 7
## 4 1 K2 both 2
## 5 1 K2 pass 9
## 6 1 K2 visits_only 3
## 7 2 K1 both 10
## 8 2 K1 pass 1
## 9 2 K1 visits_only 7
## 10 2 K2 both 4
## 11 2 K2 pass 3
## 12 2 K2 visits_only 8
## 13 3 K1 both 6
## 14 3 K1 pass 2
## 15 3 K1 visits_only 10
## 16 3 K2 bins_only 1
## 17 3 K2 both 1
## 18 3 K2 pass 9
## 19 3 K2 visits_only 5
## 20 4 K1 bins_only 1
## 21 4 K1 both 1
## 22 4 K1 pass 16
## 23 4 K1 visits_only 1
## 24 4 K2 bins_only 3
## 25 4 K2 both 5
## 26 4 K2 pass 6
## 27 4 K2 visits_only 3
## 28 5 K1 bins_only 4
## 29 5 K1 both 3
## 30 5 K1 pass 7
## 31 5 K1 visits_only 6
## 32 5 K2 both 7
## 33 5 K2 pass 6
## 34 5 K2 visits_only 4
write_csv(fail_diag, "failure_reason_by_period_treatment.csv")
# 2) Sensitivity grid to find workable rule
visit_thresholds <- c(20, 18, 15)
bin_thresholds <- c(5, 4)
sens <- tidyr::crossing(min_visits = visit_thresholds, min_bins = bin_thresholds) %>%
rowwise() %>%
do({
mv <- .$min_visits
mb <- .$min_bins
out <- cow_period %>%
mutate(pass = n_visits >= mv & n_bins >= mb) %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_qualified = sum(pass),
n_total = n(),
pct_qualified = round(100 * n_qualified / n_total, 1),
.groups = "drop"
) %>%
mutate(min_visits = mv, min_bins = mb)
out
}) %>%
ungroup() %>%
select(min_visits, min_bins, period_id, grazing_treatment, n_qualified, n_total, pct_qualified) %>%
arrange(min_visits, min_bins, period_id, grazing_treatment)
print(sens, n = nrow(sens))
## # A tibble: 60 × 7
## min_visits min_bins period_id grazing_treatment n_qualified n_total
## <dbl> <dbl> <int> <chr> <int> <int>
## 1 15 4 1 K1 16 18
## 2 15 4 1 K2 11 14
## 3 15 4 2 K1 3 18
## 4 15 4 2 K2 10 15
## 5 15 4 3 K1 9 18
## 6 15 4 3 K2 14 16
## 7 15 4 4 K1 18 19
## 8 15 4 4 K2 15 17
## 9 15 4 5 K1 16 20
## 10 15 4 5 K2 12 17
## 11 15 5 1 K1 15 18
## 12 15 5 1 K2 10 14
## 13 15 5 2 K1 2 18
## 14 15 5 2 K2 9 15
## 15 15 5 3 K1 8 18
## 16 15 5 3 K2 12 16
## 17 15 5 4 K1 17 19
## 18 15 5 4 K2 9 17
## 19 15 5 5 K1 11 20
## 20 15 5 5 K2 10 17
## 21 18 4 1 K1 14 18
## 22 18 4 1 K2 10 14
## 23 18 4 2 K1 1 18
## 24 18 4 2 K2 6 15
## 25 18 4 3 K1 6 18
## 26 18 4 3 K2 10 16
## 27 18 4 4 K1 18 19
## 28 18 4 4 K2 12 17
## 29 18 4 5 K1 11 20
## 30 18 4 5 K2 8 17
## 31 18 5 1 K1 13 18
## 32 18 5 1 K2 10 14
## 33 18 5 2 K1 1 18
## 34 18 5 2 K2 5 15
## 35 18 5 3 K1 6 18
## 36 18 5 3 K2 9 16
## 37 18 5 4 K1 17 19
## 38 18 5 4 K2 8 17
## 39 18 5 5 K1 7 20
## 40 18 5 5 K2 8 17
## 41 20 4 1 K1 10 18
## 42 20 4 1 K2 9 14
## 43 20 4 2 K1 1 18
## 44 20 4 2 K2 3 15
## 45 20 4 3 K1 2 18
## 46 20 4 3 K2 10 16
## 47 20 4 4 K1 17 19
## 48 20 4 4 K2 8 17
## 49 20 4 5 K1 11 20
## 50 20 4 5 K2 6 17
## 51 20 5 1 K1 10 18
## 52 20 5 1 K2 9 14
## 53 20 5 2 K1 1 18
## 54 20 5 2 K2 3 15
## 55 20 5 3 K1 2 18
## 56 20 5 3 K2 9 16
## 57 20 5 4 K1 16 19
## 58 20 5 4 K2 6 17
## 59 20 5 5 K1 7 20
## 60 20 5 5 K2 6 17
## # ℹ 1 more variable: pct_qualified <dbl>
write_csv(sens, "criteria_sensitivity_14day.csv")
# Use less stringent filtering criteria and compare
library(readr)
library(dplyr)
library(lme4)
library(performance)
# -----------------------------
# Repeatability (ICC) on filtered cow-period data
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
period_id = factor(period_id),
CH4_g_d = as.numeric(CH4_g_d)
) %>%
filter(is.finite(CH4_g_d), !is.na(cow_id), !is.na(period_id))
# Basic checks
cat("Raw filtered rows:", nrow(dat), "\n")
## Raw filtered rows: 1772
cat("Unique cows:", n_distinct(dat$cow_id), "\n")
## Unique cows: 29
cat("Unique periods:", n_distinct(dat$period_id), "\n\n")
## Unique periods: 5
# Mean CH4 per cow-period
cow_period <- dat %>%
group_by(cow_id, period_id) %>%
summarise(ch4_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop")
cat("Cow-period rows:", nrow(cow_period), "\n")
## Cow-period rows: 69
cat("Cow-period counts by period:\n")
## Cow-period counts by period:
print(table(cow_period$period_id))
##
## 1 2 3 4 5
## 19 4 11 22 13
cat("\n")
# Optional: how many periods per cow (for stability context)
periods_per_cow <- cow_period %>%
group_by(cow_id) %>%
summarise(n_periods = n_distinct(period_id), .groups = "drop")
cat("Periods per cow summary:\n")
## Periods per cow summary:
print(summary(periods_per_cow$n_periods))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.379 3.000 5.000
cat("\n")
# Random-intercept model for repeatability
m_rep <- lmer(ch4_mean ~ (1 | cow_id), data = cow_period, REML = TRUE)
# ICC (repeatability)
icc_out <- performance::icc(m_rep)
cat("=== Repeatability (ICC) ===\n")
## === Repeatability (ICC) ===
print(icc_out)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.519
## Unadjusted ICC: 0.519
cat("\n")
# Variance components
cat("=== Variance Components ===\n")
## === Variance Components ===
print(VarCorr(m_rep), comp = "Variance")
## Groups Name Variance
## cow_id (Intercept) 1050.30
## Residual 973.28
cat("\n")
# Save outputs
# ICC object is list-like; coerce safely
icc_tbl <- as.data.frame(icc_out)
write_csv(icc_tbl, "repeatability_icc_CH4_14day_filtered.csv")
var_tbl <- as.data.frame(VarCorr(m_rep))
write_csv(var_tbl, "repeatability_variance_components_CH4_14day_filtered.csv")
write_csv(cow_period, "repeatability_input_cow_period_CH4_14day_filtered.csv")
library(readr)
library(dplyr)
library(tidyr)
library(lme4)
library(performance)
# ---------------------------------------------------------
# Compare ICC across filter rules:
# 1) >=20 visits & >=5 bins
# 2) >=18 visits & >=5 bins
# 3) >=15 visits & >=4 bins
# ---------------------------------------------------------
# Load data with period + 4hr bins
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(
!is.na(period_id),
!is.na(time_bin_4hr),
is.finite(CH4_g_d),
grazing_treatment %in% c("K1", "K2")
)
# Candidate rules
rules <- tibble::tribble(
~rule_name, ~min_visits, ~min_bins,
"20v_5bins", 20, 5,
"18v_5bins", 18, 5,
"15v_4bins", 15, 4
)
# Per cow-period stats once
cow_period_stats <- dat %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(
n_visits = n(),
n_bins = n_distinct(time_bin_4hr),
ch4_mean = mean(CH4_g_d, na.rm = TRUE),
.groups = "drop"
)
run_rule_icc <- function(min_visits, min_bins, rule_name) {
cp <- cow_period_stats %>%
filter(n_visits >= min_visits, n_bins >= min_bins)
# Basic retention stats
n_cows <- n_distinct(cp$cow_id)
n_rows <- nrow(cp)
n_periods <- n_distinct(cp$period_id)
period_counts <- cp %>%
count(period_id, name = "n_cow_period_rows") %>%
complete(period_id = sort(unique(cow_period_stats$period_id)),
fill = list(n_cow_period_rows = 0)) %>%
arrange(period_id)
# Need enough data to fit ICC
if (n_cows < 2 || n_rows < 3) {
icc_adj <- NA_real_
icc_unadj <- NA_real_
var_cow <- NA_real_
var_resid <- NA_real_
fit_status <- "insufficient_data"
} else {
m <- lmer(ch4_mean ~ (1 | cow_id), data = cp, REML = TRUE)
icc_obj <- performance::icc(m)
icc_adj <- as.numeric(icc_obj$ICC_adjusted)
icc_unadj <- as.numeric(icc_obj$ICC_unadjusted)
vc <- as.data.frame(VarCorr(m))
var_cow <- vc$vcov[vc$grp == "cow_id"]
var_resid <- vc$vcov[vc$grp == "Residual"]
fit_status <- "ok"
}
summary_row <- tibble(
rule_name = rule_name,
min_visits = min_visits,
min_bins = min_bins,
n_cows_retained = n_cows,
n_cow_period_rows = n_rows,
n_periods_retained = n_periods,
ICC_adjusted = icc_adj,
ICC_unadjusted = icc_unadj,
var_between_cow = var_cow,
var_within_resid = var_resid,
fit_status = fit_status
)
period_row <- period_counts %>%
mutate(
rule_name = rule_name,
min_visits = min_visits,
min_bins = min_bins
) %>%
select(rule_name, min_visits, min_bins, period_id, n_cow_period_rows)
list(summary = summary_row, period = period_row, data = cp)
}
# Run all rules
res_list <- lapply(seq_len(nrow(rules)), function(i) {
run_rule_icc(rules$min_visits[i], rules$min_bins[i], rules$rule_name[i])
})
icc_summary <- bind_rows(lapply(res_list, `[[`, "summary")) %>%
arrange(desc(ICC_adjusted), desc(n_cows_retained), desc(n_cow_period_rows))
period_coverage <- bind_rows(lapply(res_list, `[[`, "period")) %>%
arrange(rule_name, period_id)
# Print
cat("=== ICC comparison across rules ===\n")
## === ICC comparison across rules ===
print(icc_summary, n = nrow(icc_summary))
## # A tibble: 3 × 11
## rule_name min_visits min_bins n_cows_retained n_cow_period_rows
## <chr> <dbl> <dbl> <int> <int>
## 1 15v_4bins 15 4 35 124
## 2 20v_5bins 20 5 29 69
## 3 18v_5bins 18 5 30 84
## # ℹ 6 more variables: n_periods_retained <int>, ICC_adjusted <dbl>,
## # ICC_unadjusted <dbl>, var_between_cow <dbl>, var_within_resid <dbl>,
## # fit_status <chr>
cat("\n=== Period coverage (cow-period rows) by rule ===\n")
##
## === Period coverage (cow-period rows) by rule ===
print(period_coverage, n = nrow(period_coverage))
## # A tibble: 15 × 5
## rule_name min_visits min_bins period_id n_cow_period_rows
## <chr> <dbl> <dbl> <int> <int>
## 1 15v_4bins 15 4 1 27
## 2 15v_4bins 15 4 2 13
## 3 15v_4bins 15 4 3 23
## 4 15v_4bins 15 4 4 33
## 5 15v_4bins 15 4 5 28
## 6 18v_5bins 18 5 1 23
## 7 18v_5bins 18 5 2 6
## 8 18v_5bins 18 5 3 15
## 9 18v_5bins 18 5 4 25
## 10 18v_5bins 18 5 5 15
## 11 20v_5bins 20 5 1 19
## 12 20v_5bins 20 5 2 4
## 13 20v_5bins 20 5 3 11
## 14 20v_5bins 20 5 4 22
## 15 20v_5bins 20 5 5 13
# Save outputs
write_csv(icc_summary, "ICC_comparison_by_filter_rule.csv")
write_csv(period_coverage, "ICC_period_coverage_by_filter_rule.csv")
# Also save each retained cow-period dataset
for (obj in res_list) {
nm <- obj$summary$rule_name[1]
write_csv(obj$data, paste0("cow_period_retained_", nm, ".csv"))
}
# 15 visits, 4 time bin selected. Check cows that failed
library(readr)
library(dplyr)
library(writexl)
# Chosen rule
min_visits <- 15
min_bins <- 4
# Load data
dat <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
period_id = as.integer(period_id),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(!is.na(period_id), !is.na(time_bin_4hr), grazing_treatment %in% c("K1", "K2"))
# Cow-period stats
cow_period_stats <- dat %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(
n_visits = n(),
n_bins_covered = n_distinct(time_bin_4hr),
bins_covered = paste(sort(unique(time_bin_4hr)), collapse = "; "),
.groups = "drop"
) %>%
mutate(
pass_visits = n_visits >= min_visits,
pass_bins = n_bins_covered >= min_bins,
pass_all = pass_visits & pass_bins,
fail_reason = case_when(
!pass_visits & !pass_bins ~ "Fail visits + bins",
!pass_visits & pass_bins ~ "Fail visits only",
pass_visits & !pass_bins ~ "Fail bins only",
TRUE ~ "Pass"
)
)
# Passed and failed cow-period rows
passed_cow_periods <- cow_period_stats %>%
filter(pass_all) %>%
arrange(period_id, grazing_treatment, cow_id)
failed_cow_periods <- cow_period_stats %>%
filter(!pass_all) %>%
arrange(period_id, grazing_treatment, cow_id)
# IDs retained vs removed at cow level
all_ids <- sort(unique(cow_period_stats$cow_id))
retained_ids <- sort(unique(passed_cow_periods$cow_id))
removed_ids <- setdiff(all_ids, retained_ids)
retained_id_table <- tibble(cow_id = retained_ids) %>%
left_join(
dat %>% distinct(cow_id, grazing_treatment),
by = "cow_id"
) %>%
arrange(grazing_treatment, cow_id)
removed_id_table <- tibble(cow_id = removed_ids) %>%
left_join(
dat %>% distinct(cow_id, grazing_treatment),
by = "cow_id"
) %>%
arrange(grazing_treatment, cow_id)
# Period-level retained counts
retained_counts_period <- passed_cow_periods %>%
count(period_id, grazing_treatment, name = "n_retained_cow_period") %>%
arrange(period_id, grazing_treatment)
# Final filtered observation-level dataset
dat_passed <- dat %>%
inner_join(
passed_cow_periods %>% select(cow_id, grazing_treatment, period_id),
by = c("cow_id", "grazing_treatment", "period_id")
)
# Print summary
cat("Rule used: >=", min_visits, " visits and >=", min_bins, " bins per cow-period\n\n")
## Rule used: >= 15 visits and >= 4 bins per cow-period
cat("Total unique cows:", length(all_ids), "\n")
## Total unique cows: 37
cat("Retained unique cows:", length(retained_ids), "\n")
## Retained unique cows: 35
cat("Removed unique cows:", length(removed_ids), "\n\n")
## Removed unique cows: 2
cat("Removed cow IDs:\n")
## Removed cow IDs:
if (nrow(removed_id_table) == 0) {
cat("None\n")
} else {
print(removed_id_table, n = nrow(removed_id_table))
}
## # A tibble: 2 × 2
## cow_id grazing_treatment
## <chr> <chr>
## 1 338D K1
## 2 422J K2
cat("\nRetained cow-period counts by period x treatment:\n")
##
## Retained cow-period counts by period x treatment:
print(retained_counts_period, n = nrow(retained_counts_period))
## # A tibble: 10 × 3
## period_id grazing_treatment n_retained_cow_period
## <int> <chr> <int>
## 1 1 K1 16
## 2 1 K2 11
## 3 2 K1 3
## 4 2 K2 10
## 5 3 K1 9
## 6 3 K2 14
## 7 4 K1 18
## 8 4 K2 15
## 9 5 K1 16
## 10 5 K2 12
# Save CSVs
write_csv(cow_period_stats, "cow_period_stats_15v_4bins.csv")
write_csv(passed_cow_periods, "passed_cow_periods_15v_4bins.csv")
write_csv(failed_cow_periods, "failed_cow_periods_15v_4bins.csv")
write_csv(retained_id_table, "retained_cow_ids_15v_4bins.csv")
write_csv(removed_id_table, "removed_cow_ids_15v_4bins.csv")
write_csv(retained_counts_period, "retained_counts_by_period_treatment_15v_4bins.csv")
write_csv(dat_passed, "analysis_3SD_14day_passed_only_15v_4bins.csv")
# Save Excel workbook
write_xlsx(
list(
cow_period_stats = cow_period_stats,
passed_cow_periods = passed_cow_periods,
failed_cow_periods = failed_cow_periods,
retained_cow_ids = retained_id_table,
removed_cow_ids = removed_id_table,
retained_counts_period_treatment = retained_counts_period,
data_passed_only = dat_passed
),
"TOD_14day_filtering_15v_4bins_results.xlsx"
)
# Compute mean CH4 and CO2 per period:
library(readr)
library(dplyr)
library(writexl)
# Use the filtered dataset from chosen rule (15 visits + 4 bins)
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
period_id = as.integer(period_id),
grazing_treatment = toupper(trimws(grazing_treatment)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
) %>%
filter(!is.na(period_id), grazing_treatment %in% c("K1", "K2"))
# ---------------------------------------
# Mean CH4 and CO2 by period x treatment
# ---------------------------------------
period_means <- dat %>%
group_by(period_id, grazing_treatment) %>%
summarise(
n_obs = n(),
n_cows = n_distinct(cow_id),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_obs),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
CO2_mean = mean(CO2_g_d, na.rm = TRUE),
CO2_sd = sd(CO2_g_d, na.rm = TRUE),
CO2_se = CO2_sd / sqrt(n_obs),
CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
# Optional: pooled across treatment (period only)
period_means_overall <- dat %>%
group_by(period_id) %>%
summarise(
n_obs = n(),
n_cows = n_distinct(cow_id),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_obs),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
CO2_mean = mean(CO2_g_d, na.rm = TRUE),
CO2_sd = sd(CO2_g_d, na.rm = TRUE),
CO2_se = CO2_sd / sqrt(n_obs),
CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,
.groups = "drop"
) %>%
arrange(period_id)
# Print
cat("=== Mean CH4 and CO2 by Period x Treatment (15v4bins) ===\n")
## === Mean CH4 and CO2 by Period x Treatment (15v4bins) ===
print(period_means, n = nrow(period_means))
## # A tibble: 10 × 14
## period_id grazing_treatment n_obs n_cows CH4_mean CH4_sd CH4_se CH4_lcl_95
## <int> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 K1 354 16 325. 90.2 4.79 316.
## 2 1 K2 309 11 372. 85.8 4.88 363.
## 3 2 K1 54 3 266. 78.0 10.6 245.
## 4 2 K2 184 10 339. 94.4 6.96 325.
## 5 3 K1 162 9 311. 87.4 6.86 297.
## 6 3 K2 306 14 337. 90.3 5.16 327.
## 7 4 K1 521 18 329. 76.0 3.33 323.
## 8 4 K2 298 15 342. 87.1 5.05 332.
## 9 5 K1 336 16 273. 88.9 4.85 264.
## 10 5 K2 235 12 308. 73.4 4.79 299.
## # ℹ 6 more variables: CH4_ucl_95 <dbl>, CO2_mean <dbl>, CO2_sd <dbl>,
## # CO2_se <dbl>, CO2_lcl_95 <dbl>, CO2_ucl_95 <dbl>
cat("\n=== Mean CH4 and CO2 by Period (Overall) ===\n")
##
## === Mean CH4 and CO2 by Period (Overall) ===
print(period_means_overall, n = nrow(period_means_overall))
## # A tibble: 5 × 13
## period_id n_obs n_cows CH4_mean CH4_sd CH4_se CH4_lcl_95 CH4_ucl_95 CO2_mean
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 663 27 347. 91.2 3.54 340. 354. 11325.
## 2 2 238 13 322. 95.8 6.21 310. 335. 10891.
## 3 3 468 23 328. 90.0 4.16 320. 336. 10490.
## 4 4 819 33 334. 80.4 2.81 328. 339. 10652.
## 5 5 571 28 288. 84.5 3.54 281. 295. 9773.
## # ℹ 4 more variables: CO2_sd <dbl>, CO2_se <dbl>, CO2_lcl_95 <dbl>,
## # CO2_ucl_95 <dbl>
# Save CSV
write_csv(period_means, "period_means_CH4_CO2_by_treatment_15v4bins.csv")
write_csv(period_means_overall, "period_means_CH4_CO2_overall_15v4bins.csv")
# Save Excel
write_xlsx(
list(
period_by_treatment = period_means,
period_overall = period_means_overall
),
"period_means_CH4_CO2_15v4bins.xlsx"
)
# Compare and choose the best Covariance structure
library(readr)
library(dplyr)
library(nlme)
# ---------------------------------------
# Covariance structure comparison (15v4bins)
# Includes corrected AR1 with within-cow index
# ---------------------------------------
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
period_id = factor(period_id, levels = as.character(1:5)),
start_time_num = as.numeric(start_time),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
) %>%
filter(
!is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
) %>%
arrange(cow_id, start_time_num) %>%
group_by(cow_id) %>%
mutate(obs_index = row_number()) %>% # AR1-safe unique integer index within cow
ungroup()
calc_caic <- function(mod) {
ll <- as.numeric(logLik(mod))
np <- attr(logLik(mod), "df")
n <- nrow(mod$data)
nf <- length(fixef(mod))
-2 * ll + np * (log(n - nf) + 1)
}
fit_cov_set <- function(resp) {
fml <- as.formula(paste0(resp, " ~ grazing_treatment + period_id + grazing_treatment:period_id"))
safe_fit <- function(expr) {
tryCatch(expr, error = function(e) structure(list(err = conditionMessage(e)), class = "fit_err"))
}
mod_ar1 <- safe_fit(
lme(
fixed = fml,
random = ~1 | cow_id,
data = dat,
correlation = corAR1(form = ~ obs_index | cow_id),
method = "REML",
na.action = na.exclude,
control = lmeControl(msMaxIter = 200, opt = "optim")
)
)
mod_cs <- safe_fit(
lme(
fixed = fml,
random = ~1 | cow_id,
data = dat,
correlation = corCompSymm(form = ~ obs_index | cow_id),
method = "REML",
na.action = na.exclude,
control = lmeControl(msMaxIter = 200, opt = "optim")
)
)
mod_pow <- safe_fit(
lme(
fixed = fml,
random = ~1 | cow_id,
data = dat,
correlation = corExp(form = ~ start_time_num | cow_id),
method = "REML",
na.action = na.exclude,
control = lmeControl(msMaxIter = 200, opt = "optim")
)
)
mods <- list(AR1 = mod_ar1, CompSymm = mod_cs, SP_POW = mod_pow)
out <- bind_rows(lapply(names(mods), function(s) {
m <- mods[[s]]
if (inherits(m, "fit_err")) {
return(data.frame(
response = resp, structure = s,
AIC = NA, BIC = NA, CAIC = NA, logLik = NA, npar = NA,
status = paste("failed:", m$err), stringsAsFactors = FALSE
))
}
data.frame(
response = resp, structure = s,
AIC = AIC(m), BIC = BIC(m), CAIC = calc_caic(m),
logLik = as.numeric(logLik(m)),
npar = attr(logLik(m), "df"),
status = "ok",
stringsAsFactors = FALSE
)
}))
ok <- out %>% filter(status == "ok")
if (nrow(ok) > 0) {
out <- out %>%
mutate(
delta_AIC = ifelse(status == "ok", AIC - min(ok$AIC), NA_real_),
delta_BIC = ifelse(status == "ok", BIC - min(ok$BIC), NA_real_),
delta_CAIC = ifelse(status == "ok", CAIC - min(ok$CAIC), NA_real_)
)
} else {
out$delta_AIC <- NA_real_
out$delta_BIC <- NA_real_
out$delta_CAIC <- NA_real_
}
out
}
res <- bind_rows(
fit_cov_set("CH4_g_d"),
fit_cov_set("CO2_g_d")
)
print(res)
## response structure AIC BIC CAIC logLik npar status delta_AIC
## 1 CH4_g_d AR1 31959.56 32036.51 32049.51 -15966.78 13 ok 0.00000
## 2 CH4_g_d CompSymm 31989.27 32066.22 32079.22 -15981.63 13 ok 29.70721
## 3 CH4_g_d SP_POW 31987.14 32064.09 32077.09 -15980.57 13 ok 27.58209
## 4 CO2_g_d AR1 47101.83 47178.78 47191.78 -23537.92 13 ok 0.00000
## 5 CO2_g_d CompSymm 47155.22 47232.17 47245.17 -23564.61 13 ok 53.39202
## 6 CO2_g_d SP_POW 47155.22 47232.17 47245.17 -23564.61 13 ok 53.38580
## delta_BIC delta_CAIC
## 1 0.00000 0.00000
## 2 29.70721 29.70721
## 3 27.58209 27.58209
## 4 0.00000 0.00000
## 5 53.39202 53.39202
## 6 53.38580 53.38580
cat("\nBest by CAIC among successful fits:\n")
##
## Best by CAIC among successful fits:
print(
res %>%
filter(status == "ok") %>%
group_by(response) %>%
slice_min(order_by = CAIC, n = 1, with_ties = FALSE) %>%
select(response, structure, AIC, BIC, CAIC)
)
## # A tibble: 2 × 5
## # Groups: response [2]
## response structure AIC BIC CAIC
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 CH4_g_d AR1 31960. 32037. 32050.
## 2 CO2_g_d AR1 47102. 47179. 47192.
write_csv(res, "covariance_structure_comparison_15v4bins_with_AR1fix.csv")
#Check for model stability
library(readr)
library(dplyr)
library(nlme)
# -----------------------------
# Load filtered data (15v4bins)
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
period_id = factor(period_id, levels = as.character(1:5)),
start_time_num = as.numeric(start_time),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
) %>%
filter(
!is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
) %>%
arrange(cow_id, start_time_num) %>%
group_by(cow_id) %>%
mutate(obs_index = row_number()) %>% # for AR1 within cow
ungroup()
# -----------------------------
# Fit selected AR1 models
# -----------------------------
fit_ar1 <- function(resp) {
fml <- as.formula(paste0(resp, " ~ grazing_treatment + period_id + grazing_treatment:period_id"))
m <- lme(
fixed = fml,
random = ~1 | cow_id,
data = dat,
correlation = corAR1(form = ~ obs_index | cow_id),
method = "REML",
na.action = na.exclude,
control = lmeControl(msMaxIter = 200, opt = "optim")
)
# Extract key diagnostics
vc <- VarCorr(m)
sigma_resid <- m$sigma
ar1_phi <- as.numeric(coef(m$modelStruct$corStruct, unconstrained = FALSE))
# Basic checks
check_tbl <- tibble::tibble(
response = resp,
n_obs = nrow(dat),
n_cows = dplyr::n_distinct(dat$cow_id),
AIC = AIC(m),
BIC = BIC(m),
logLik = as.numeric(logLik(m)),
residual_sigma = sigma_resid,
ar1_phi = ar1_phi,
ar1_in_valid_range = (ar1_phi > -1 & ar1_phi < 1),
random_intercept_var = as.numeric(vc[1, "Variance"]),
random_var_positive = as.numeric(vc[1, "Variance"]) > 0
)
list(model = m, checks = check_tbl)
}
ch4_fit <- fit_ar1("CH4_g_d")
co2_fit <- fit_ar1("CO2_g_d")
checks <- bind_rows(ch4_fit$checks, co2_fit$checks)
cat("=== AR1 MODEL STABILITY CHECKS ===\n")
## === AR1 MODEL STABILITY CHECKS ===
print(checks)
## # A tibble: 2 × 11
## response n_obs n_cows AIC BIC logLik residual_sigma ar1_phi
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CH4_g_d 2759 35 31960. 32037. -15967. 79.1 0.106
## 2 CO2_g_d 2759 35 47102. 47179. -23538. 1242. 0.141
## # ℹ 3 more variables: ar1_in_valid_range <lgl>, random_intercept_var <dbl>,
## # random_var_positive <lgl>
cat("\n=== CH4 model summary ===\n")
##
## === CH4 model summary ===
print(summary(ch4_fit$model))
## Linear mixed-effects model fit by REML
## Data: dat
## AIC BIC logLik
## 31959.56 32036.51 -15966.78
##
## Random effects:
## Formula: ~1 | cow_id
## (Intercept) Residual
## StdDev: 33.80667 79.09526
##
## Correlation Structure: AR(1)
## Formula: ~obs_index | cow_id
## Parameter estimate(s):
## Phi
## 0.1057705
## Fixed effects: list(fml)
## Value Std.Error DF t-value p-value
## (Intercept) 326.4029 9.185077 2716 35.53622 0.0000
## grazing_treatmentK2 49.0961 13.608625 33 3.60772 0.0010
## period_id2 -36.8234 13.518622 2716 -2.72390 0.0065
## period_id3 -13.1066 8.486922 2716 -1.54432 0.1226
## period_id4 2.2532 6.104921 2716 0.36908 0.7121
## period_id5 -52.2423 6.860083 2716 -7.61540 0.0000
## grazing_treatmentK2:period_id2 -3.2627 15.905696 2716 -0.20513 0.8375
## grazing_treatmentK2:period_id3 -30.3006 11.219057 2716 -2.70082 0.0070
## grazing_treatmentK2:period_id4 -40.7164 9.620034 2716 -4.23246 0.0000
## grazing_treatmentK2:period_id5 -14.2470 10.306304 2716 -1.38236 0.1670
## Correlation:
## (Intr) grz_K2 prd_d2 prd_d3 prd_d4 prd_d5
## grazing_treatmentK2 -0.675
## period_id2 -0.167 0.113
## period_id3 -0.269 0.181 0.223
## period_id4 -0.407 0.274 0.255 0.410
## period_id5 -0.373 0.252 0.222 0.356 0.535
## grazing_treatmentK2:period_id2 0.142 -0.219 -0.850 -0.189 -0.217 -0.189
## grazing_treatmentK2:period_id3 0.203 -0.320 -0.169 -0.756 -0.310 -0.269
## grazing_treatmentK2:period_id4 0.258 -0.397 -0.162 -0.260 -0.635 -0.339
## grazing_treatmentK2:period_id5 0.249 -0.355 -0.148 -0.237 -0.356 -0.666
## g_K2:_2 g_K2:_3 g_K2:_4
## grazing_treatmentK2
## period_id2
## period_id3
## period_id4
## period_id5
## grazing_treatmentK2:period_id2
## grazing_treatmentK2:period_id3 0.293
## grazing_treatmentK2:period_id4 0.317 0.460
## grazing_treatmentK2:period_id5 0.279 0.410 0.492
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.46835653 -0.67306515 -0.02186649 0.62079946 3.52945008
##
## Number of Observations: 2759
## Number of Groups: 35
cat("\n=== CO2 model summary ===\n")
##
## === CO2 model summary ===
print(summary(co2_fit$model))
## Linear mixed-effects model fit by REML
## Data: dat
## AIC BIC logLik
## 47101.83 47178.78 -23537.92
##
## Random effects:
## Formula: ~1 | cow_id
## (Intercept) Residual
## StdDev: 837.2731 1242.23
##
## Correlation Structure: AR(1)
## Formula: ~obs_index | cow_id
## Parameter estimate(s):
## Phi
## 0.1414196
## Fixed effects: list(fml)
## Value Std.Error DF t-value p-value
## (Intercept) 11288.444 208.28320 2716 54.19758 0.0000
## grazing_treatmentK2 314.486 308.36924 33 1.01984 0.3152
## period_id2 -361.216 219.77902 2716 -1.64354 0.1004
## period_id3 -799.809 137.85735 2716 -5.80171 0.0000
## period_id4 -625.351 99.30279 2716 -6.29741 0.0000
## period_id5 -1574.090 111.79395 2716 -14.08028 0.0000
## grazing_treatmentK2:period_id2 -217.535 258.56148 2716 -0.84133 0.4002
## grazing_treatmentK2:period_id3 -283.765 182.42591 2716 -1.55551 0.1199
## grazing_treatmentK2:period_id4 -453.201 156.74402 2716 -2.89134 0.0039
## grazing_treatmentK2:period_id5 -65.925 167.76727 2716 -0.39295 0.6944
## Correlation:
## (Intr) grz_K2 prd_d2 prd_d3 prd_d4 prd_d5
## grazing_treatmentK2 -0.675
## period_id2 -0.119 0.080
## period_id3 -0.192 0.129 0.225
## period_id4 -0.292 0.198 0.253 0.409
## period_id5 -0.270 0.182 0.220 0.353 0.534
## grazing_treatmentK2:period_id2 0.101 -0.157 -0.850 -0.191 -0.215 -0.187
## grazing_treatmentK2:period_id3 0.145 -0.230 -0.170 -0.756 -0.309 -0.267
## grazing_treatmentK2:period_id4 0.185 -0.287 -0.161 -0.259 -0.634 -0.339
## grazing_treatmentK2:period_id5 0.180 -0.256 -0.147 -0.235 -0.356 -0.666
## g_K2:_2 g_K2:_3 g_K2:_4
## grazing_treatmentK2
## period_id2
## period_id3
## period_id4
## period_id5
## grazing_treatmentK2:period_id2
## grazing_treatmentK2:period_id3 0.295
## grazing_treatmentK2:period_id4 0.316 0.461
## grazing_treatmentK2:period_id5 0.277 0.409 0.492
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.45605348 -0.66720951 -0.05787612 0.66611221 3.81234532
##
## Number of Observations: 2759
## Number of Groups: 35
# Save checks
write_csv(checks, "ar1_model_stability_checks_15v4bins.csv")
# Check fixed effects using AR1 model
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
# -----------------------------
# Load filtered data
# -----------------------------
dat <- read_csv("analysis_3SD_14day_passed_only_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(trimws(grazing_treatment)), levels = c("K1", "K2")),
period_id = factor(period_id, levels = as.character(1:5)),
start_time_num = as.numeric(start_time),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
) %>%
filter(
!is.na(cow_id), !is.na(grazing_treatment), !is.na(period_id),
is.finite(start_time_num), is.finite(CH4_g_d), is.finite(CO2_g_d)
) %>%
arrange(cow_id, start_time_num) %>%
group_by(cow_id) %>%
mutate(obs_index = row_number()) %>%
ungroup()
fit_ar1 <- function(resp) {
fml <- as.formula(paste0(resp, " ~ grazing_treatment * period_id"))
lme(
fixed = fml,
random = ~1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.exclude,
control = lmeControl(msMaxIter = 200, opt = "optim")
)
}
# -----------------------------
# Fit AR1 models
# -----------------------------
m_ch4 <- fit_ar1("CH4_g_d")
m_co2 <- fit_ar1("CO2_g_d")
# -----------------------------
# Fixed effects significance
# -----------------------------
cat("=== CH4: Fixed effects (Type-like ANOVA table) ===\n")
## === CH4: Fixed effects (Type-like ANOVA table) ===
print(anova(m_ch4))
## numDF denDF F-value p-value
## (Intercept) 1 2716 2901.7546 <.0001
## grazing_treatment 1 33 5.2967 0.0278
## period_id 4 2716 37.0500 <.0001
## grazing_treatment:period_id 4 2716 5.3236 0.0003
cat("\n=== CO2: Fixed effects (Type-like ANOVA table) ===\n")
##
## === CO2: Fixed effects (Type-like ANOVA table) ===
print(anova(m_co2))
## numDF denDF F-value p-value
## (Intercept) 1 2716 5375.457 <.0001
## grazing_treatment 1 33 0.353 0.5563
## period_id 4 2716 96.904 <.0001
## grazing_treatment:period_id 4 2716 2.515 0.0397
# -----------------------------
# Interaction-focused contrasts
# K1 vs K2 within each period
# -----------------------------
cat("\n=== CH4: K1 vs K2 within each period ===\n")
##
## === CH4: K1 vs K2 within each period ===
emm_ch4 <- emmeans(m_ch4, ~ grazing_treatment | period_id)
ch4_contr <- contrast(emm_ch4, method = "revpairwise", adjust = "tukey")
print(ch4_contr)
## period_id = 1:
## contrast estimate SE df t.ratio p.value
## K2 - K1 49.10 13.6 33 3.608 0.0010
##
## period_id = 2:
## contrast estimate SE df t.ratio p.value
## K2 - K1 45.83 18.5 33 2.473 0.0187
##
## period_id = 3:
## contrast estimate SE df t.ratio p.value
## K2 - K1 18.80 14.6 33 1.287 0.2070
##
## period_id = 4:
## contrast estimate SE df t.ratio p.value
## K2 - K1 8.38 13.2 33 0.636 0.5294
##
## period_id = 5:
## contrast estimate SE df t.ratio p.value
## K2 - K1 34.85 13.9 33 2.516 0.0169
##
## Degrees-of-freedom method: containment
cat("\n=== CO2: K1 vs K2 within each period ===\n")
##
## === CO2: K1 vs K2 within each period ===
emm_co2 <- emmeans(m_co2, ~ grazing_treatment | period_id)
co2_contr <- contrast(emm_co2, method = "revpairwise", adjust = "tukey")
print(co2_contr)
## period_id = 1:
## contrast estimate SE df t.ratio p.value
## K2 - K1 314.5 308 33 1.020 0.3152
##
## period_id = 2:
## contrast estimate SE df t.ratio p.value
## K2 - K1 97.0 370 33 0.262 0.7950
##
## period_id = 3:
## contrast estimate SE df t.ratio p.value
## K2 - K1 30.7 320 33 0.096 0.9241
##
## period_id = 4:
## contrast estimate SE df t.ratio p.value
## K2 - K1 -138.7 303 33 -0.458 0.6503
##
## period_id = 5:
## contrast estimate SE df t.ratio p.value
## K2 - K1 248.6 311 33 0.799 0.4300
##
## Degrees-of-freedom method: containment
# -----------------------------
# Save tables
# -----------------------------
ch4_anova <- as.data.frame(anova(m_ch4))
co2_anova <- as.data.frame(anova(m_co2))
ch4_anova$term <- rownames(ch4_anova)
co2_anova$term <- rownames(co2_anova)
ch4_contr_df <- as.data.frame(ch4_contr)
co2_contr_df <- as.data.frame(co2_contr)
readr::write_csv(ch4_anova, "CH4_AR1_fixed_effects_anova.csv")
readr::write_csv(co2_anova, "CO2_AR1_fixed_effects_anova.csv")
readr::write_csv(ch4_contr_df, "CH4_AR1_K1vsK2_by_period.csv")
readr::write_csv(co2_contr_df, "CO2_AR1_K1vsK2_by_period.csv")
#Checking fixed effect using Type 3 ANOVA for K1 and K2.
library(readr)
library(dplyr)
library(gt)
library(openxlsx)
fmt_p <- function(p) ifelse(is.na(p), NA, ifelse(p < 0.001, "<0.001", sprintf("%.3f", p)))
sig_code <- function(p) case_when(
is.na(p) ~ "",
p < 0.001 ~ "***",
p < 0.01 ~ "**",
p < 0.05 ~ "*",
p < 0.10 ~ ".",
TRUE ~ ""
)
clean_nm <- function(x) {
x |>
tolower() |>
gsub("[^a-z0-9]+", "_", x = _) |>
gsub("^_|_$", "", x = _)
}
read_aov_clean <- function(path, response_label) {
x <- read_csv(path, show_col_types = FALSE)
# Keep original names, but create cleaned map for robust matching
nm_orig <- names(x)
nm_clean <- clean_nm(nm_orig)
pick_col <- function(patterns) {
idx <- which(vapply(patterns, function(p) any(grepl(p, nm_clean)), logical(1)))
if (length(idx) == 0) return(NA_integer_)
which(grepl(patterns[idx[1]], nm_clean))[1]
}
i_term <- pick_col(c("^effect$", "^term$"))
i_numdf <- pick_col(c("^numdf$", "^num_df$"))
i_dendf <- pick_col(c("^dendf$", "^den_df$"))
i_f <- pick_col(c("^f_value$", "^f$"))
i_p <- pick_col(c("^p_value$", "^p$", "^pr_f$"))
if (any(is.na(c(i_term, i_numdf, i_dendf, i_f, i_p)))) {
cat("\nColumn detection failed for:", path, "\n")
print(data.frame(original = nm_orig, cleaned = nm_clean))
stop("Could not identify required ANOVA columns. See printed column map.")
}
x %>%
mutate(response = response_label) %>%
transmute(
response = response,
term = .data[[nm_orig[i_term]]],
num_df = as.numeric(.data[[nm_orig[i_numdf]]]),
den_df = as.numeric(.data[[nm_orig[i_dendf]]]),
F_value = as.numeric(.data[[nm_orig[i_f]]]),
p_raw = suppressWarnings(as.numeric(.data[[nm_orig[i_p]]])) # handles "<.0001" as NA
) %>%
mutate(
p_value = ifelse(is.na(p_raw), as.character(.data[[nm_orig[i_p]]]), fmt_p(p_raw)),
sig = sig_code(p_raw)
) %>%
select(-p_raw)
}
aov_tbl <- bind_rows(
read_aov_clean("CH4_AR1_fixed_effects_anova.csv", "CH4_g_d"),
read_aov_clean("CO2_AR1_fixed_effects_anova.csv", "CO2_g_d")
)
gt_aov <- aov_tbl %>%
gt(groupname_col = "response") %>%
tab_header(
title = "Fixed Effects (AR1 Model)",
subtitle = "Type-like ANOVA"
) %>%
fmt_number(columns = c(num_df, den_df), decimals = 0) %>%
fmt_number(columns = F_value, decimals = 3)
gtsave(gt_aov, "table_fixed_effects_anova.html")
wb <- createWorkbook()
addWorksheet(wb, "Fixed_Effects_ANOVA")
writeData(wb, "Fixed_Effects_ANOVA", aov_tbl)
saveWorkbook(wb, "table_fixed_effects_anova.xlsx", overwrite = TRUE)
print(aov_tbl)
## # A tibble: 8 × 7
## response term num_df den_df F_value p_value sig
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 CH4_g_d (Intercept) 1 2716 2902. <0.001 "***"
## 2 CH4_g_d grazing_treatment 1 33 5.30 0.028 "*"
## 3 CH4_g_d period_id 4 2716 37.1 <0.001 "***"
## 4 CH4_g_d grazing_treatment:period_id 4 2716 5.32 <0.001 "***"
## 5 CO2_g_d (Intercept) 1 2716 5375. <0.001 "***"
## 6 CO2_g_d grazing_treatment 1 33 0.353 0.556 ""
## 7 CO2_g_d period_id 4 2716 96.9 <0.001 "***"
## 8 CO2_g_d grazing_treatment:period_id 4 2716 2.51 0.040 "*"
library(dplyr)
library(tibble)
library(readr)
anova_tbl <- tribble(
~response, ~term, ~num_df, ~den_df, ~F_value, ~p_value, ~sig,
"CH4_g_d", "(Intercept)", 1, 2716, 2901.754598, "<0.001", "***",
"CH4_g_d", "grazing_treatment", 1, 33, 5.296669551, "0.028", "*",
"CH4_g_d", "period_id", 4, 2716, 37.05002376, "<0.001", "***",
"CH4_g_d", "grazing_treatment:period_id", 4, 2716, 5.323578645, "<0.001", "***",
"CO2_g_d", "(Intercept)", 1, 2716, 5375.456905, "<0.001", "***",
"CO2_g_d", "grazing_treatment", 1, 33, 0.3533128301, "0.556", "",
"CO2_g_d", "period_id", 4, 2716, 96.90369902, "<0.001", "***",
"CO2_g_d", "grazing_treatment:period_id", 4, 2716, 2.514783258, "0.040", "*"
)
anova_tbl2 <- anova_tbl %>%
mutate(
p_num = suppressWarnings(as.numeric(gsub("^<", "", p_value))),
interpretation = case_when(
grepl("^<", p_value) ~ "Statistically significant",
!is.na(p_num) & p_num < 0.05 ~ "Statistically significant",
!is.na(p_num) & p_num >= 0.05 ~ "Not statistically significant",
TRUE ~ "Check p-value format"
)
) %>%
select(-p_num)
print(anova_tbl2)
## # A tibble: 8 × 8
## response term num_df den_df F_value p_value sig interpretation
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 CH4_g_d (Intercept) 1 2716 2.90e+3 <0.001 "***" Statistically…
## 2 CH4_g_d grazing_treatment 1 33 5.30e+0 0.028 "*" Statistically…
## 3 CH4_g_d period_id 4 2716 3.71e+1 <0.001 "***" Statistically…
## 4 CH4_g_d grazing_treatment… 4 2716 5.32e+0 <0.001 "***" Statistically…
## 5 CO2_g_d (Intercept) 1 2716 5.38e+3 <0.001 "***" Statistically…
## 6 CO2_g_d grazing_treatment 1 33 3.53e-1 0.556 "" Not statistic…
## 7 CO2_g_d period_id 4 2716 9.69e+1 <0.001 "***" Statistically…
## 8 CO2_g_d grazing_treatment… 4 2716 2.51e+0 0.040 "*" Statistically…
write_csv(anova_tbl2, "anova_with_interpretation.csv")
library(dplyr)
library(stringr)
anova_tbl2 <- anova_tbl %>%
mutate(
p_num = suppressWarnings(as.numeric(str_replace(p_value, "^<", ""))),
significance_level = case_when(
str_detect(p_value, "^<0\\.001$") ~ "Highly significant",
!is.na(p_num) & p_num < 0.001 ~ "Highly significant",
!is.na(p_num) & p_num < 0.05 ~ "Moderately significant",
!is.na(p_num) & p_num >= 0.05 ~ "Not significant",
TRUE ~ "Check p-value format"
)
) %>%
select(-p_num)
print(anova_tbl2)
## # A tibble: 8 × 8
## response term num_df den_df F_value p_value sig significance_level
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 CH4_g_d (Intercept) 1 2716 2.90e+3 <0.001 "***" Highly significant
## 2 CH4_g_d grazing_treat… 1 33 5.30e+0 0.028 "*" Moderately signif…
## 3 CH4_g_d period_id 4 2716 3.71e+1 <0.001 "***" Highly significant
## 4 CH4_g_d grazing_treat… 4 2716 5.32e+0 <0.001 "***" Highly significant
## 5 CO2_g_d (Intercept) 1 2716 5.38e+3 <0.001 "***" Highly significant
## 6 CO2_g_d grazing_treat… 1 33 3.53e-1 0.556 "" Not significant
## 7 CO2_g_d period_id 4 2716 9.69e+1 <0.001 "***" Highly significant
## 8 CO2_g_d grazing_treat… 4 2716 2.51e+0 0.040 "*" Moderately signif…
#Fit model (AR1)
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)
# 1) Find input file
candidates <- c(
"analysis_3SD_14day_15v4bins.csv",
"analysis_3SD_14day_15v4_passed.csv",
"analysis_3SD_14day_passed_only.csv"
)
input_file <- candidates[file.exists(candidates)][1]
if (is.na(input_file)) stop("No input file found.")
cat("Using file:", input_file, "\n")
## Using file: analysis_3SD_14day_passed_only.csv
# 2) Load + prep
dat <- read_csv(input_file, show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(grazing_treatment), levels = c("K1", "K2")),
period_id = factor(period_id),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup() %>%
filter(!is.na(grazing_treatment), !is.na(period_id))
# 3) Fit CH4 AR1
m_ch4 <- lme(
fixed = CH4_g_d ~ grazing_treatment * period_id,
random = ~ 1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.omit
)
# 4) Fit CO2 AR1
m_co2 <- lme(
fixed = CO2_g_d ~ grazing_treatment * period_id,
random = ~ 1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.omit
)
# 5) EMMs
emm_ch4 <- emmeans(m_ch4, ~ grazing_treatment | period_id)
emm_co2 <- emmeans(m_co2, ~ grazing_treatment | period_id)
emm_ch4_df <- as.data.frame(emm_ch4)
emm_co2_df <- as.data.frame(emm_co2)
# 6) K2 - K1 contrasts per period
ctr_ch4_df <- as.data.frame(contrast(emm_ch4, method = "revpairwise", adjust = "tukey"))
ctr_co2_df <- as.data.frame(contrast(emm_co2, method = "revpairwise", adjust = "tukey"))
# 7) Save
write_csv(emm_ch4_df, "CH4_AR1_emmeans_by_period_treatment.csv")
write_csv(emm_co2_df, "CO2_AR1_emmeans_by_period_treatment.csv")
write_csv(ctr_ch4_df, "CH4_AR1_K2_minus_K1_by_period_tukey.csv")
write_csv(ctr_co2_df, "CO2_AR1_K2_minus_K1_by_period_tukey.csv")
wb <- createWorkbook()
addWorksheet(wb, "CH4_emmeans")
addWorksheet(wb, "CO2_emmeans")
addWorksheet(wb, "CH4_K2_minus_K1")
addWorksheet(wb, "CO2_K2_minus_K1")
writeData(wb, "CH4_emmeans", emm_ch4_df)
writeData(wb, "CO2_emmeans", emm_co2_df)
writeData(wb, "CH4_K2_minus_K1", ctr_ch4_df)
writeData(wb, "CO2_K2_minus_K1", ctr_co2_df)
saveWorkbook(wb, "AR1_emmeans_and_contrasts_step1.xlsx", overwrite = TRUE)
cat("Done. Files saved.\n")
## Done. Files saved.
library(readr)
library(dplyr)
library(ggplot2)
# Read emmeans outputs from Step 1
ch4 <- read_csv("CH4_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
co2 <- read_csv("CO2_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
plot_emm <- function(df, ylab, title, out_png, col_k1 = "forestgreen", col_k2 = "steelblue") {
d <- df %>%
mutate(
period_id = factor(period_id, levels = sort(unique(as.numeric(as.character(period_id))))),
grazing_treatment = factor(grazing_treatment, levels = c("K1", "K2"))
)
p <- ggplot(d, aes(x = period_id, y = emmean, color = grazing_treatment, group = grazing_treatment)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2.8) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15, linewidth = 0.7) +
scale_color_manual(values = c("K1" = col_k1, "K2" = col_k2)) +
labs(
title = title,
x = "14-day period",
y = ylab,
color = "Treatment"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "top",
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold")
)
ggsave(out_png, p, width = 9, height = 6, dpi = 320)
p
}
p_ch4 <- plot_emm(
ch4,
ylab = "Adjusted mean CH4 (g/day)",
title = "CH4 Adjusted Means by Period and Treatment (AR1)",
out_png = "CH4_AR1_interaction_plot.png"
)
p_co2 <- plot_emm(
co2,
ylab = "Adjusted mean CO2 (g/day)",
title = "CO2 Adjusted Means by Period and Treatment (AR1)",
out_png = "CO2_AR1_interaction_plot.png"
)
print(p_ch4)

print(p_co2)

cat("Saved:\n- CH4_AR1_interaction_plot.png\n- CO2_AR1_interaction_plot.png\n")
## Saved:
## - CH4_AR1_interaction_plot.png
## - CO2_AR1_interaction_plot.png
library(readr)
library(dplyr)
library(openxlsx)
# Inputs from Step 1
emm_ch4 <- read_csv("CH4_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
emm_co2 <- read_csv("CO2_AR1_emmeans_by_period_treatment.csv", show_col_types = FALSE)
ctr_ch4 <- read_csv("CH4_AR1_K2_minus_K1_by_period_tukey.csv", show_col_types = FALSE)
ctr_co2 <- read_csv("CO2_AR1_K2_minus_K1_by_period_tukey.csv", show_col_types = FALSE)
make_effect_table <- function(emm_df, ctr_df, response_label) {
k1 <- emm_df %>%
filter(grazing_treatment == "K1") %>%
select(period_id, K1_emmean = emmean)
ci_mult <- qt(0.975, df = ctr_df$df)
ctr_df %>%
left_join(k1, by = "period_id") %>%
mutate(
response = response_label,
lower.CL = estimate - ci_mult * SE,
upper.CL = estimate + ci_mult * SE,
pct_diff_vs_K1 = 100 * estimate / K1_emmean,
sig = case_when(
p.value < 0.001 ~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
p.value < 0.10 ~ ".",
TRUE ~ ""
),
significance = case_when(
p.value < 0.001 ~ "Highly significant",
p.value < 0.05 ~ "Moderately significant",
TRUE ~ "Not significant"
)
) %>%
select(response, period_id, contrast, estimate, lower.CL, upper.CL, K1_emmean,
pct_diff_vs_K1, SE, df, p.value, sig, significance) %>%
arrange(period_id)
}
eff_ch4 <- make_effect_table(emm_ch4, ctr_ch4, "CH4_g_d")
eff_co2 <- make_effect_table(emm_co2, ctr_co2, "CO2_g_d")
eff_all <- bind_rows(eff_ch4, eff_co2)
print(eff_all)
## # A tibble: 10 × 13
## response period_id contrast estimate lower.CL upper.CL K1_emmean
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CH4_g_d 1 K2 - K1 47.5 18.0 77.0 336.
## 2 CH4_g_d 2 K2 - K1 30.4 -23.0 83.9 300.
## 3 CH4_g_d 3 K2 - K1 37.1 -3.93 78.1 307.
## 4 CH4_g_d 4 K2 - K1 7.40 -23.2 38.0 333.
## 5 CH4_g_d 5 K2 - K1 59.9 26.6 93.1 272.
## 6 CO2_g_d 1 K2 - K1 259. -412. 930. 11497.
## 7 CO2_g_d 2 K2 - K1 -20.5 -1004. 963. 11081.
## 8 CO2_g_d 3 K2 - K1 135. -679. 949. 10553.
## 9 CO2_g_d 4 K2 - K1 -143. -826. 541. 10748.
## 10 CO2_g_d 5 K2 - K1 565. -151. 1281. 9733.
## # ℹ 6 more variables: pct_diff_vs_K1 <dbl>, SE <dbl>, df <dbl>, p.value <dbl>,
## # sig <chr>, significance <chr>
write_csv(eff_ch4, "CH4_effect_sizes_K2_minus_K1_by_period.csv")
write_csv(eff_co2, "CO2_effect_sizes_K2_minus_K1_by_period.csv")
write_csv(eff_all, "effect_sizes_K2_minus_K1_by_period_all.csv")
wb <- createWorkbook()
addWorksheet(wb, "CH4_effect_sizes")
addWorksheet(wb, "CO2_effect_sizes")
addWorksheet(wb, "All")
writeData(wb, "CH4_effect_sizes", eff_ch4)
writeData(wb, "CO2_effect_sizes", eff_co2)
writeData(wb, "All", eff_all)
saveWorkbook(wb, "effect_sizes_K2_minus_K1_by_period.xlsx", overwrite = TRUE)
# Compute herd CH4 using TOD for K1 vs K2
library(readr)
library(dplyr)
library(openxlsx)
# Inputs (15v4 rule)
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv" # cow-periods that passed 15 visits + 4 bins
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
# Keep only qualified cow-periods
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))
# TOD method:
# 1) mean CH4 within each 4h bin per cow-period
bin_means <- dat %>%
group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
summarise(
n_visits_bin = n(),
CH4_bin_mean = mean(CH4_g_d, na.rm = TRUE),
.groups = "drop"
)
# 2) mean of bin means per cow-period (equal weight per bin)
tod_cow_period <- bin_means %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(
n_bins_used = n(),
TOD_CH4_mean_period = mean(CH4_bin_mean, na.rm = TRUE),
.groups = "drop"
)
# 3) overall TOD mean CH4 per cow across its qualified periods
tod_cow_overall <- tod_cow_period %>%
group_by(cow_id, grazing_treatment) %>%
summarise(
n_periods = n(),
TOD_CH4_mean_overall = mean(TOD_CH4_mean_period, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(grazing_treatment, desc(TOD_CH4_mean_overall))
# Optional treatment summary
tod_treatment_summary <- tod_cow_overall %>%
group_by(grazing_treatment) %>%
summarise(
n_cows = n(),
mean_TOD_CH4 = mean(TOD_CH4_mean_overall, na.rm = TRUE),
sd_TOD_CH4 = sd(TOD_CH4_mean_overall, na.rm = TRUE),
.groups = "drop"
)
cat("Qualified unique cows (15v4):", n_distinct(tod_cow_overall$cow_id), "\n")
## Qualified unique cows (15v4): 35
# Save
write_csv(bin_means, "TOD_CH4_bin_means_15v4.csv")
write_csv(tod_cow_period, "TOD_CH4_mean_by_cow_period_15v4.csv")
write_csv(tod_cow_overall, "TOD_CH4_mean_by_cow_overall_15v4.csv")
write_csv(tod_treatment_summary, "TOD_CH4_summary_by_treatment_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Bin_Means")
addWorksheet(wb, "Cow_Period_TOD")
addWorksheet(wb, "Cow_Overall_TOD")
addWorksheet(wb, "Treatment_Summary")
writeData(wb, "Bin_Means", bin_means)
writeData(wb, "Cow_Period_TOD", tod_cow_period)
writeData(wb, "Cow_Overall_TOD", tod_cow_overall)
writeData(wb, "Treatment_Summary", tod_treatment_summary)
saveWorkbook(wb, "TOD_CH4_results_15v4.xlsx", overwrite = TRUE)
print(head(tod_cow_overall, 10))
## # A tibble: 10 × 4
## cow_id grazing_treatment n_periods TOD_CH4_mean_overall
## <chr> <chr> <int> <dbl>
## 1 290G K1 3 372.
## 2 266H K1 4 366.
## 3 364B K1 3 343.
## 4 312E K1 4 342.
## 5 322F K1 3 340.
## 6 454C K1 1 331.
## 7 338J K1 3 328.
## 8 410C K1 3 325.
## 9 314J K1 5 315.
## 10 370H K1 4 309.
print(tod_treatment_summary)
## # A tibble: 2 × 4
## grazing_treatment n_cows mean_TOD_CH4 sd_TOD_CH4
## <chr> <int> <dbl> <dbl>
## 1 K1 19 311. 38.0
## 2 K2 16 339. 37.1
# Estimate CH4 using AM for TOD data
library(readr)
library(dplyr)
library(openxlsx)
# Use the exact same qualified TOD dataset (15v4)
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d)
) %>%
filter(is.finite(CH4_g_d))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
# Exact rows used for TOD eligibility
dat_tod <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))
# Herd-level AM CH4 from exact same rows
herd_am <- dat_tod %>%
summarise(
n_rows = n(),
n_cows = n_distinct(cow_id),
n_periods = n_distinct(period_id),
CH4_mean_AM_herd = mean(CH4_g_d, na.rm = TRUE),
CH4_sd_AM_herd = sd(CH4_g_d, na.rm = TRUE),
CH4_se_AM_herd = CH4_sd_AM_herd / sqrt(n_rows),
CH4_lcl_95 = CH4_mean_AM_herd - 1.96 * CH4_se_AM_herd,
CH4_ucl_95 = CH4_mean_AM_herd + 1.96 * CH4_se_AM_herd
)
# Optional by treatment
herd_am_by_trt <- dat_tod %>%
group_by(grazing_treatment) %>%
summarise(
n_rows = n(),
n_cows = n_distinct(cow_id),
CH4_mean_AM = mean(CH4_g_d, na.rm = TRUE),
CH4_sd_AM = sd(CH4_g_d, na.rm = TRUE),
CH4_se_AM = CH4_sd_AM / sqrt(n_rows),
CH4_lcl_95 = CH4_mean_AM - 1.96 * CH4_se_AM,
CH4_ucl_95 = CH4_mean_AM + 1.96 * CH4_se_AM,
.groups = "drop"
)
print(herd_am)
## # A tibble: 1 × 8
## n_rows n_cows n_periods CH4_mean_AM_herd CH4_sd_AM_herd CH4_se_AM_herd
## <int> <int> <int> <dbl> <dbl> <dbl>
## 1 2759 35 5 326. 89.4 1.70
## # ℹ 2 more variables: CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>
print(herd_am_by_trt)
## # A tibble: 2 × 8
## grazing_treatment n_rows n_cows CH4_mean_AM CH4_sd_AM CH4_se_AM CH4_lcl_95
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 K1 1427 19 311. 87.4 2.31 306.
## 2 K2 1332 16 341. 88.7 2.43 337.
## # ℹ 1 more variable: CH4_ucl_95 <dbl>
write_csv(herd_am, "herd_CH4_AM_from_TOD_exact_rows_15v4.csv")
write_csv(herd_am_by_trt, "herd_CH4_AM_by_treatment_from_TOD_exact_rows_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Herd_AM_Overall")
addWorksheet(wb, "Herd_AM_By_Treatment")
writeData(wb, "Herd_AM_Overall", herd_am)
writeData(wb, "Herd_AM_By_Treatment", herd_am_by_trt)
saveWorkbook(wb, "herd_CH4_AM_from_TOD_exact_rows_15v4.xlsx", overwrite = TRUE)
#Use LSMeans for the same estimation
library(readr)
library(dplyr)
library(lme4)
library(emmeans)
# -----------------------------
# 1) Load exact TOD-qualified 15v4 data used for model
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(period_id),
CH4_g_d = as.numeric(CH4_g_d)
) %>%
filter(is.finite(CH4_g_d))
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = factor(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
droplevels()
# -----------------------------
# 2) Fit model used for treatment LSMeans
# -----------------------------
m_ch4 <- lmer(
CH4_g_d ~ grazing_treatment * period_id + (1 | cow_id),
data = dat,
REML = TRUE
)
# -----------------------------
# 3) Get treatment LSMeans
# -----------------------------
emm_trt <- as.data.frame(emmeans(m_ch4, ~ grazing_treatment))
print(emm_trt)
## grazing_treatment emmean SE df lower.CL upper.CL
## K1 306.4828 8.481591 38.16 289.3151 323.6506
## K2 337.6802 8.892241 32.81 319.5848 355.7756
##
## Results are averaged over the levels of: period_id
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
# -----------------------------
# 4) Add sample numbers (rows + cows) used in model
# -----------------------------
counts_by_trt <- dat %>%
group_by(grazing_treatment) %>%
summarise(
n_rows = n(),
n_cows = n_distinct(cow_id),
.groups = "drop"
)
emm_with_n <- emm_trt %>%
left_join(counts_by_trt, by = "grazing_treatment")
cat("\n=== Sample size used for each treatment LSMean ===\n")
##
## === Sample size used for each treatment LSMean ===
print(emm_with_n)
## grazing_treatment emmean SE df lower.CL upper.CL n_rows n_cows
## K1 306.4828 8.481591 38.16 289.3151 323.6506 1427 19
## K2 337.6802 8.892241 32.81 319.5848 355.7756 1332 16
##
## Results are averaged over the levels of: period_id
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
cat("\n=== Overall sample size used in model ===\n")
##
## === Overall sample size used in model ===
cat("Total rows:", nrow(dat), "\n")
## Total rows: 2759
cat("Total cows:", n_distinct(dat$cow_id), "\n")
## Total cows: 35
cat("Total periods:", n_distinct(dat$period_id), "\n")
## Total periods: 5
# -----------------------------
# 5) Optional: period-wise counts
# -----------------------------
counts_trt_period <- dat %>%
group_by(grazing_treatment, period_id) %>%
summarise(
n_rows = n(),
n_cows = n_distinct(cow_id),
.groups = "drop"
)
cat("\n=== Counts by treatment x period ===\n")
##
## === Counts by treatment x period ===
print(counts_trt_period)
## # A tibble: 10 × 4
## grazing_treatment period_id n_rows n_cows
## <fct> <fct> <int> <int>
## 1 K1 1 354 16
## 2 K1 2 54 3
## 3 K1 3 162 9
## 4 K1 4 521 18
## 5 K1 5 336 16
## 6 K2 1 309 11
## 7 K2 2 184 10
## 8 K2 3 306 14
## 9 K2 4 298 15
## 10 K2 5 235 12
# -----------------------------
# 6) Save outputs
# -----------------------------
readr::write_csv(emm_with_n, "CH4_treatment_lsmeans_with_sample_sizes_15v4.csv")
readr::write_csv(counts_trt_period, "CH4_counts_by_treatment_period_15v4.csv")
# Compare the three methods
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(tidyr)
library(openxlsx)
# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# -----------------------------
# Load and align exact analysis rows
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(is.finite(CH4_g_d), !is.na(grazing_treatment), !is.na(period_id))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
mutate(
grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
period_id = factor(period_id)
) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# Method 1: AM (visit-level arithmetic mean)
# -----------------------------
am_by_trt <- dat %>%
group_by(grazing_treatment) %>%
summarise(
n_rows = n(),
n_cows = n_distinct(cow_id),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_rows),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
.groups = "drop"
) %>%
mutate(method = "AM") %>%
select(method, grazing_treatment, n_cows, n_rows, CH4_mean, CH4_se, CH4_lcl_95, CH4_ucl_95)
# -----------------------------
# Method 2: TOD (bin means then average)
# -----------------------------
tod_by_trt <- dat %>%
filter(!is.na(time_bin_4hr)) %>%
group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
summarise(ch4_bin_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop") %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(ch4_tod_period = mean(ch4_bin_mean, na.rm = TRUE), .groups = "drop") %>%
group_by(grazing_treatment) %>%
summarise(
n_rows = n(), # cow-period rows
n_cows = n_distinct(cow_id),
CH4_mean = mean(ch4_tod_period, na.rm = TRUE),
CH4_sd = sd(ch4_tod_period, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_rows),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
.groups = "drop"
) %>%
mutate(method = "TOD") %>%
select(method, grazing_treatment, n_cows, n_rows, CH4_mean, CH4_se, CH4_lcl_95, CH4_ucl_95)
# -----------------------------
# Method 3: LSMeans (AR1 model)
# -----------------------------
m_ar1 <- lme(
CH4_g_d ~ grazing_treatment * period_id,
random = ~ 1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.omit
)
counts_by_trt <- dat %>%
group_by(grazing_treatment) %>%
summarise(
n_cows = n_distinct(cow_id),
n_rows = n(),
.groups = "drop"
)
lsm_by_trt <- as.data.frame(emmeans(m_ar1, ~ grazing_treatment)) %>%
mutate(grazing_treatment = as.character(grazing_treatment)) %>%
left_join(
counts_by_trt %>% mutate(grazing_treatment = as.character(grazing_treatment)),
by = "grazing_treatment"
) %>%
transmute(
method = "LSMeans_AR1",
grazing_treatment,
n_cows,
n_rows,
CH4_mean = emmean,
CH4_se = SE,
CH4_lcl_95 = lower.CL,
CH4_ucl_95 = upper.CL
)
# -----------------------------
# Combine + comparison
# -----------------------------
method_table <- bind_rows(am_by_trt, tod_by_trt, lsm_by_trt) %>%
arrange(grazing_treatment, method)
overall_table <- method_table %>%
group_by(method) %>%
summarise(
CH4_mean_overall = mean(CH4_mean, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
diff_vs_AM = CH4_mean_overall - CH4_mean_overall[method == "AM"],
pct_vs_AM = 100 * diff_vs_AM / CH4_mean_overall[method == "AM"]
)
wide_compare <- method_table %>%
select(method, grazing_treatment, CH4_mean) %>%
pivot_wider(names_from = method, values_from = CH4_mean) %>%
mutate(
AM_minus_TOD = AM - TOD,
AM_minus_LSM = AM - LSMeans_AR1,
TOD_minus_LSM = TOD - LSMeans_AR1
)
print(method_table)
## # A tibble: 6 × 8
## method grazing_treatment n_cows n_rows CH4_mean CH4_se CH4_lcl_95 CH4_ucl_95
## <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 AM K1 19 1427 311. 2.31 306. 315.
## 2 LSMeans… K1 19 1427 306. 8.56 289. 324.
## 3 TOD K1 19 62 311. 5.79 299. 322.
## 4 AM K2 16 1332 341. 2.43 337. 346.
## 5 LSMeans… K2 16 1332 338. 8.90 320. 356.
## 6 TOD K2 16 62 340. 5.55 330. 351.
print(overall_table)
## # A tibble: 3 × 4
## method CH4_mean_overall diff_vs_AM pct_vs_AM
## <chr> <dbl> <dbl> <dbl>
## 1 AM 326. 0 0
## 2 LSMeans_AR1 322. -3.96 -1.21
## 3 TOD 326. -0.531 -0.163
print(wide_compare)
## # A tibble: 2 × 7
## grazing_treatment AM LSMeans_AR1 TOD AM_minus_TOD AM_minus_LSM
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 K1 311. 306. 311. 0.0558 4.27
## 2 K2 341. 338. 340. 1.01 3.64
## # ℹ 1 more variable: TOD_minus_LSM <dbl>
# -----------------------------
# Save
# -----------------------------
write_csv(method_table, "CH4_method_comparison_by_treatment_15v4.csv")
write_csv(overall_table, "CH4_method_comparison_overall_15v4.csv")
write_csv(wide_compare, "CH4_method_pairwise_differences_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "By_Treatment")
addWorksheet(wb, "Overall")
addWorksheet(wb, "Pairwise_Diff")
writeData(wb, "By_Treatment", method_table)
writeData(wb, "Overall", overall_table)
writeData(wb, "Pairwise_Diff", wide_compare)
saveWorkbook(wb, "CH4_three_method_comparison_15v4.xlsx", overwrite = TRUE)
# Computes Pearson, Spearman, RMSE, Bland-Altman, and rank-shift:
library(readr)
library(dplyr)
# -----------------------------
# Load exact 15v4-qualified rows
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(is.finite(CH4_g_d))
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))
# -----------------------------
# AM cow means
# -----------------------------
cow_am <- dat %>%
group_by(cow_id, grazing_treatment) %>%
summarise(CH4_AM = mean(CH4_g_d, na.rm = TRUE), .groups = "drop")
# -----------------------------
# TOD cow means
# -----------------------------
cow_tod <- dat %>%
filter(!is.na(time_bin_4hr)) %>%
group_by(cow_id, grazing_treatment, period_id, time_bin_4hr) %>%
summarise(ch4_bin_mean = mean(CH4_g_d, na.rm = TRUE), .groups = "drop") %>%
group_by(cow_id, grazing_treatment, period_id) %>%
summarise(ch4_tod_period = mean(ch4_bin_mean, na.rm = TRUE), .groups = "drop") %>%
group_by(cow_id, grazing_treatment) %>%
summarise(CH4_TOD = mean(ch4_tod_period, na.rm = TRUE), .groups = "drop")
# -----------------------------
# LSMeans-like cow adjusted means
# -----------------------------
cow_lsm <- read_csv("CH4_adjusted_means_by_cow_15v4.csv", show_col_types = FALSE) %>%
transmute(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
CH4_LSM = as.numeric(CH4_adjusted_mean_conditional)
)
# -----------------------------
# Merge
# -----------------------------
cow_all <- cow_am %>%
inner_join(cow_tod, by = c("cow_id", "grazing_treatment")) %>%
inner_join(cow_lsm, by = c("cow_id", "grazing_treatment"))
# -----------------------------
# Pairwise agreement stats
# -----------------------------
pair_stats <- function(df, x, y, label) {
xv <- df[[x]]
yv <- df[[y]]
d <- xv - yv
data.frame(
comparison = label,
n = length(xv),
pearson_r = cor(xv, yv, method = "pearson"),
pearson_p = cor.test(xv, yv, method = "pearson")$p.value,
spearman_rho = cor(xv, yv, method = "spearman"),
spearman_p = cor.test(xv, yv, method = "spearman")$p.value,
mean_diff = mean(d),
sd_diff = sd(d),
rmse = sqrt(mean(d^2)),
loa_low = mean(d) - 1.96 * sd(d),
loa_high = mean(d) + 1.96 * sd(d)
)
}
stats_all <- bind_rows(
pair_stats(cow_all, "CH4_AM", "CH4_TOD", "AM vs TOD"),
pair_stats(cow_all, "CH4_AM", "CH4_LSM", "AM vs LSMeans"),
pair_stats(cow_all, "CH4_TOD", "CH4_LSM", "TOD vs LSMeans")
)
# -----------------------------
# Rank shifts
# -----------------------------
rank_tbl <- cow_all %>%
mutate(
rank_AM = rank(-CH4_AM, ties.method = "average"),
rank_TOD = rank(-CH4_TOD, ties.method = "average"),
rank_LSM = rank(-CH4_LSM, ties.method = "average"),
shift_AM_TOD = rank_AM - rank_TOD,
shift_AM_LSM = rank_AM - rank_LSM,
shift_TOD_LSM = rank_TOD - rank_LSM
) %>%
arrange(rank_AM)
print(stats_all)
## comparison n pearson_r pearson_p spearman_rho spearman_p mean_diff
## 1 AM vs TOD 35 0.9837427 3.442751e-26 0.9675070 0 -0.08970823
## 2 AM vs LSMeans 35 0.9982950 2.657313e-42 0.9969188 0 -0.19600603
## 3 TOD vs LSMeans 35 0.9827106 9.433727e-26 0.9705882 0 -0.10629780
## sd_diff rmse loa_low loa_high
## 1 7.122566 7.020651 -14.049938 13.87052
## 2 3.279855 3.238597 -6.624522 6.23251
## 3 7.886157 7.773408 -15.563165 15.35057
print(head(rank_tbl, 10))
## # A tibble: 10 × 11
## cow_id grazing_treatment CH4_AM CH4_TOD CH4_LSM rank_AM rank_TOD rank_LSM
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 398G K2 399. 405. 392. 1 1 1
## 2 252H K2 385. 393. 383. 2 2 2
## 3 270G K2 382. 383. 380. 3 3 3
## 4 368J K2 370. 370. 368. 4 5 4
## 5 296H K2 364. 358. 363. 5 7 5
## 6 266H K1 359. 366. 356. 6 6 6
## 7 290G K1 359. 372. 356. 7 4 7
## 8 266D K2 356. 357. 356. 8 8 8
## 9 332H K2 353. 347. 352. 9 9 9
## 10 364B K1 348. 343. 346. 10 10 10
## # ℹ 3 more variables: shift_AM_TOD <dbl>, shift_AM_LSM <dbl>,
## # shift_TOD_LSM <dbl>
write_csv(cow_all, "CH4_cow_level_AM_TOD_LSM_15v4.csv")
write_csv(stats_all, "CH4_method_agreement_stats_15v4.csv")
write_csv(rank_tbl, "CH4_method_rank_shifts_15v4.csv")
#Post-diagnostic check for LSmeans model framework
library(readr)
library(dplyr)
library(nlme)
# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
)
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
fit_diag <- function(df, yvar, prefix) {
fml <- as.formula(paste0(yvar, " ~ grazing_treatment * period_id"))
# Main selected model (AR1 + random intercept cow)
m <- lme(
fixed = fml,
random = ~1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = df,
method = "REML",
na.action = na.omit
)
# For heterogeneity test, compare constant vs varIdent using ML
m_const_ml <- update(m, method = "ML")
m_het_ml <- update(m_const_ml, weights = varIdent(form = ~1 | grazing_treatment))
lrt <- anova(m_const_ml, m_het_ml)
# Residuals/fitted
r <- residuals(m, type = "normalized")
f <- fitted(m)
# Shapiro (sample if needed)
set.seed(42)
r_sub <- if (length(r) > 5000) sample(r, 5000) else r
sh <- shapiro.test(r_sub)
# Outlier count using |std resid| > 3
n_out3 <- sum(abs(r) > 3, na.rm = TRUE)
# Save plots
png(paste0(prefix, "_postdiag_residual_plots.png"), width = 1400, height = 1000, res = 130)
par(mfrow = c(2,2))
hist(r, breaks = "FD", col = "#4C78A8", border = "white",
main = paste(prefix, "Residual Histogram"), xlab = "Normalized residual")
qqnorm(r, pch = 16, cex = 0.5, main = paste(prefix, "Residual QQ Plot"))
qqline(r, col = "red", lwd = 2)
plot(f, r, pch = 16, cex = 0.5, col = "#2E7D32",
main = paste(prefix, "Residuals vs Fitted"), xlab = "Fitted", ylab = "Normalized residual")
abline(h = 0, col = "red", lwd = 2)
plot(f, sqrt(abs(r)), pch = 16, cex = 0.5, col = "#1565C0",
main = paste(prefix, "Scale-Location"), xlab = "Fitted", ylab = "sqrt(|normalized residual|)")
dev.off()
# Residual autocorrelation summary (lag 1)
ac <- ACF(m, resType = "normalized")
ac_df <- as.data.frame(ac)
lag1 <- ac_df$ACF[ac_df$lag == 1][1]
# Output table
out <- data.frame(
response = yvar,
n_obs = length(r),
shapiro_W = unname(sh$statistic),
shapiro_p = sh$p.value,
normality_flag = ifelse(sh$p.value > 0.05, "Normal", "Non-normal"),
n_abs_std_resid_gt3 = n_out3,
ar1_phi = coef(m$modelStruct$corStruct, unconstrained = FALSE),
resid_acf_lag1 = lag1,
AIC_const = AIC(m_const_ml),
AIC_het = AIC(m_het_ml),
LRT = as.numeric(lrt$L.Ratio[2]),
p_hetero = as.numeric(lrt$`p-value`[2]),
hetero_flag = ifelse(as.numeric(lrt$`p-value`[2]) < 0.05, "Heterogeneous variance", "No strong heterogeneity")
)
list(model = m, summary = out)
}
ch4 <- fit_diag(dat, "CH4_g_d", "CH4_AR1_15v4")
co2 <- fit_diag(dat, "CO2_g_d", "CO2_AR1_15v4")
diag_tbl <- bind_rows(ch4$summary, co2$summary)
print(diag_tbl)
## response n_obs shapiro_W shapiro_p normality_flag
## Phi...1 CH4_g_d 2759 0.9974874 0.0001848912 Non-normal
## Phi...2 CO2_g_d 2759 0.9985740 0.0177185337 Non-normal
## n_abs_std_resid_gt3 ar1_phi resid_acf_lag1 AIC_const AIC_het
## Phi...1 12 0.1057703 -0.02140034 32018.07 32017.72
## Phi...2 9 0.1414392 -0.02781654 47217.70 47219.70
## LRT p_hetero hetero_flag
## Phi...1 2.356965558 0.1247246 No strong heterogeneity
## Phi...2 0.005462756 0.9410816 No strong heterogeneity
write_csv(diag_tbl, "LSMeans_AR1_postdiagnostic_summary_15v4.csv")
#Plot residual plot:
library(readr)
library(dplyr)
library(nlme)
# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
)
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# Fit AR1 models
# -----------------------------
m_ch4 <- lme(
CH4_g_d ~ grazing_treatment * period_id,
random = ~1 | cow_id,
correlation = corAR1(form = ~obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.omit
)
m_co2 <- lme(
CO2_g_d ~ grazing_treatment * period_id,
random = ~1 | cow_id,
correlation = corAR1(form = ~obs_index | cow_id),
data = dat,
method = "REML",
na.action = na.omit
)
# -----------------------------
# Residual plot helper
# -----------------------------
make_resid_plots <- function(model, prefix, color_pt = "#2E7D32") {
r <- residuals(model, type = "normalized")
f <- fitted(model)
png(paste0(prefix, "_residual_diagnostics.png"), width = 1400, height = 1000, res = 140)
par(mfrow = c(2,2))
hist(r, breaks = "FD", col = "#4C78A8", border = "white",
main = paste(prefix, "Residual Histogram"),
xlab = "Normalized residual")
qqnorm(r, pch = 16, cex = 0.55, main = paste(prefix, "QQ Plot"))
qqline(r, col = "red", lwd = 2)
plot(f, r, pch = 16, cex = 0.5, col = color_pt,
main = paste(prefix, "Residuals vs Fitted"),
xlab = "Fitted", ylab = "Normalized residual")
abline(h = 0, col = "red", lwd = 2)
plot(f, sqrt(abs(r)), pch = 16, cex = 0.5, col = "#1565C0",
main = paste(prefix, "Scale-Location"),
xlab = "Fitted", ylab = "sqrt(|normalized residual|)")
dev.off()
}
make_resid_plots(m_ch4, "CH4_AR1_15v4", color_pt = "forestgreen")
## quartz_off_screen
## 2
make_resid_plots(m_co2, "CO2_AR1_15v4", color_pt = "steelblue")
## quartz_off_screen
## 2
cat("Saved:\n",
"- CH4_AR1_15v4_residual_diagnostics.png\n",
"- CO2_AR1_15v4_residual_diagnostics.png\n")
## Saved:
## - CH4_AR1_15v4_residual_diagnostics.png
## - CO2_AR1_15v4_residual_diagnostics.png
#Use BoxCox to validate transformation
library(readr)
library(dplyr)
library(nlme)
# -----------------------------
# Load exact 15v4-qualified data
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
)
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# Box-Cox profiling for AR1 mixed model
# -----------------------------
boxcox_transform <- function(y, lambda) {
if (abs(lambda) < 1e-8) log(y) else (y^lambda - 1) / lambda
}
profile_boxcox_ar1 <- function(df, response, lambdas = seq(-1, 2, by = 0.02), prefix = "CH4") {
y <- df[[response]]
if (any(!is.finite(y)) || any(y <= 0)) {
stop(response, " has non-positive or non-finite values; Box-Cox requires y > 0.")
}
sum_log_y <- sum(log(y))
ll <- rep(NA_real_, length(lambdas))
for (i in seq_along(lambdas)) {
lam <- lambdas[i]
yt <- boxcox_transform(y, lam)
d <- df
d$y_bc <- yt
fit <- try(
lme(
fixed = y_bc ~ grazing_treatment * period_id,
random = ~1 | cow_id,
correlation = corAR1(form = ~obs_index | cow_id),
data = d,
method = "ML",
na.action = na.omit,
control = lmeControl(msMaxIter = 100, msMaxEval = 150)
),
silent = TRUE
)
if (!inherits(fit, "try-error")) {
# Box-Cox profile likelihood correction term
ll[i] <- as.numeric(logLik(fit)) + (lam - 1) * sum_log_y
}
}
ok <- is.finite(ll)
lam_ok <- lambdas[ok]
ll_ok <- ll[ok]
lam_hat <- lam_ok[which.max(ll_ok)]
ll_max <- max(ll_ok)
# 95% CI by profile likelihood drop: 0.5 * qchisq(0.95, 1)
cutoff <- ll_max - 0.5 * qchisq(0.95, df = 1)
ci_vals <- lam_ok[ll_ok >= cutoff]
ci_low <- min(ci_vals)
ci_high <- max(ci_vals)
res <- tibble(
response = response,
n = length(y),
lambda_hat = lam_hat,
lambda_ci_low = ci_low,
lambda_ci_high = ci_high,
lambda_ci_includes_1 = (ci_low <= 1 && ci_high >= 1),
transform_required = ifelse(ci_low <= 1 && ci_high >= 1, "No", "Yes"),
suggested_transform = case_when(
abs(lam_hat) < 0.15 ~ "Log transform",
TRUE ~ paste0("Box-Cox with lambda=", round(lam_hat, 2))
)
)
# Save profile for plotting
prof <- tibble(lambda = lam_ok, profile_logLik = ll_ok)
write_csv(prof, paste0(prefix, "_boxcox_profile_AR1_15v4.csv"))
# Plot profile
png(paste0(prefix, "_boxcox_profile_AR1_15v4.png"), width = 1000, height = 700, res = 130)
plot(lam_ok, ll_ok, type = "l", lwd = 2,
xlab = expression(lambda), ylab = "Profile log-likelihood",
main = paste0(prefix, " Box-Cox Profile (AR1 mixed model, 15v4)"))
abline(v = lam_hat, col = "red", lwd = 2, lty = 2)
abline(v = c(ci_low, ci_high), col = "blue", lwd = 2, lty = 3)
abline(v = 1, col = "darkgreen", lwd = 2, lty = 3)
legend("bottomright",
legend = c(
paste0("lambda_hat=", round(lam_hat, 2)),
paste0("95% CI=[", round(ci_low, 2), ", ", round(ci_high, 2), "]"),
"lambda=1 (no transform)"
),
lty = c(2, 3, 3), lwd = 2, col = c("red", "blue", "darkgreen"), bty = "n")
dev.off()
res
}
res_ch4 <- profile_boxcox_ar1(dat, "CH4_g_d", prefix = "CH4")
res_co2 <- profile_boxcox_ar1(dat, "CO2_g_d", prefix = "CO2")
boxcox_summary <- bind_rows(res_ch4, res_co2)
print(boxcox_summary)
## # A tibble: 2 × 8
## response n lambda_hat lambda_ci_low lambda_ci_high lambda_ci_includes_1
## <chr> <int> <dbl> <dbl> <dbl> <lgl>
## 1 CH4_g_d 2759 0.86 0.78 0.94 FALSE
## 2 CO2_g_d 2759 0.62 0.44 0.8 FALSE
## # ℹ 2 more variables: transform_required <chr>, suggested_transform <chr>
write_csv(boxcox_summary, "boxcox_transform_decision_AR1_15v4.csv")
# Check if trasnformation improves model:
library(readr)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)
# -----------------------------
# Data (same 15v4 analysis set)
# -----------------------------
vis <- read_csv("analysis_3SD_with_14day_periods_4hr_bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
)
qual <- read_csv("cow_period_retained_15v_4bins.csv", show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# Box-Cox transform function
# -----------------------------
bc <- function(y, lambda) {
if (abs(lambda) < 1e-8) log(y) else (y^lambda - 1) / lambda
}
# Lambdas from your Box-Cox profiling
lam_ch4 <- 0.86
lam_co2 <- 0.62
dat <- dat %>%
mutate(
CH4_bc = bc(CH4_g_d, lam_ch4),
CO2_bc = bc(CO2_g_d, lam_co2)
)
# -----------------------------
# Fit original and transformed models
# -----------------------------
fit_ar1 <- function(response, data) {
lme(
fixed = as.formula(paste0(response, " ~ grazing_treatment * period_id")),
random = ~1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = data,
method = "REML",
na.action = na.omit
)
}
m_ch4_raw <- fit_ar1("CH4_g_d", dat)
m_ch4_bc <- fit_ar1("CH4_bc", dat)
m_co2_raw <- fit_ar1("CO2_g_d", dat)
m_co2_bc <- fit_ar1("CO2_bc", dat)
# -----------------------------
# Compare fixed effects conclusions
# -----------------------------
anova_tbl <- bind_rows(
as.data.frame(anova(m_ch4_raw)) %>% mutate(model = "CH4_raw"),
as.data.frame(anova(m_ch4_bc)) %>% mutate(model = "CH4_boxcox"),
as.data.frame(anova(m_co2_raw)) %>% mutate(model = "CO2_raw"),
as.data.frame(anova(m_co2_bc)) %>% mutate(model = "CO2_boxcox"),
.id = "term"
)
# -----------------------------
# Compare K2-K1 by period (same contrasts)
# -----------------------------
get_ctr <- function(mod, label) {
as.data.frame(
contrast(emmeans(mod, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
) %>% mutate(model = label)
}
ctr_tbl <- bind_rows(
get_ctr(m_ch4_raw, "CH4_raw"),
get_ctr(m_ch4_bc, "CH4_boxcox"),
get_ctr(m_co2_raw, "CO2_raw"),
get_ctr(m_co2_bc, "CO2_boxcox")
)
# -----------------------------
# Residual diagnostics summary
# -----------------------------
diag_one <- function(mod, label) {
r <- residuals(mod, type = "normalized")
set.seed(42)
r_sub <- if (length(r) > 5000) sample(r, 5000) else r
sh <- shapiro.test(r_sub)
data.frame(
model = label,
n = length(r),
shapiro_W = unname(sh$statistic),
shapiro_p = sh$p.value,
normality_flag = ifelse(sh$p.value > 0.05, "Normal", "Non-normal"),
n_abs_std_resid_gt3 = sum(abs(r) > 3, na.rm = TRUE),
AIC = AIC(mod),
BIC = BIC(mod)
)
}
diag_tbl <- bind_rows(
diag_one(m_ch4_raw, "CH4_raw"),
diag_one(m_ch4_bc, "CH4_boxcox"),
diag_one(m_co2_raw, "CO2_raw"),
diag_one(m_co2_bc, "CO2_boxcox")
)
# -----------------------------
# Quick robustness flags
# -----------------------------
sig_terms <- function(mod) {
a <- as.data.frame(anova(mod))
tibble(
treatment_sig = a["grazing_treatment","p-value"] < 0.05,
period_sig = a["period_id","p-value"] < 0.05,
interaction_sig = a["grazing_treatment:period_id","p-value"] < 0.05
)
}
robust_tbl <- bind_rows(
bind_cols(response = "CH4", scale = "raw", sig_terms(m_ch4_raw)),
bind_cols(response = "CH4", scale = "boxcox", sig_terms(m_ch4_bc)),
bind_cols(response = "CO2", scale = "raw", sig_terms(m_co2_raw)),
bind_cols(response = "CO2", scale = "boxcox", sig_terms(m_co2_bc))
)
print(diag_tbl)
## model n shapiro_W shapiro_p normality_flag n_abs_std_resid_gt3
## 1 CH4_raw 2759 0.9974874 0.0001848912 Non-normal 12
## 2 CH4_boxcox 2759 0.9978276 0.0007202216 Non-normal 18
## 3 CO2_raw 2759 0.9985740 0.0177185337 Non-normal 9
## 4 CO2_boxcox 2759 0.9993079 0.4038794757 Normal 10
## AIC BIC
## 1 31959.56 32036.51
## 2 27529.80 27606.75
## 3 47101.83 47178.78
## 4 27740.92 27817.86
print(robust_tbl)
## # A tibble: 4 × 5
## response scale treatment_sig period_sig interaction_sig
## <chr> <chr> <lgl> <lgl> <lgl>
## 1 CH4 raw TRUE TRUE TRUE
## 2 CH4 boxcox TRUE TRUE TRUE
## 3 CO2 raw FALSE TRUE TRUE
## 4 CO2 boxcox FALSE TRUE TRUE
# -----------------------------
# Save all outputs
# -----------------------------
write_csv(anova_tbl, "sensitivity_anova_raw_vs_boxcox.csv")
write_csv(ctr_tbl, "sensitivity_contrasts_raw_vs_boxcox.csv")
write_csv(diag_tbl, "sensitivity_diagnostics_raw_vs_boxcox.csv")
write_csv(robust_tbl, "sensitivity_robustness_flags_raw_vs_boxcox.csv")
wb <- createWorkbook()
addWorksheet(wb, "ANOVA")
addWorksheet(wb, "Contrasts")
addWorksheet(wb, "Diagnostics")
addWorksheet(wb, "Robustness_Flags")
writeData(wb, "ANOVA", anova_tbl)
writeData(wb, "Contrasts", ctr_tbl)
writeData(wb, "Diagnostics", diag_tbl)
writeData(wb, "Robustness_Flags", robust_tbl)
saveWorkbook(wb, "sensitivity_raw_vs_boxcox_AR1_15v4.xlsx", overwrite = TRUE)
# Add weight and age as covariates to test grazing effects:
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)
# -----------------------------
# Paths
# -----------------------------
base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")
# -----------------------------
# 1) Load 15v4 analysis data
# -----------------------------
vis <- read_csv(file.path(work_dir, "analysis_3SD_with_14day_periods_4hr_bins.csv"), show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d)
)
qual <- read_csv(file.path(work_dir, "cow_period_retained_15v_4bins.csv"), show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d), is.finite(CO2_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# 2) Load cow-level age and weight
# -----------------------------
age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
transmute(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
age_days = as.numeric(cow_age_in_days),
age_years = as.numeric(cow_age_july_2025)
)
wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
transmute(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
weight_day0_kg = as.numeric(weight_day0_kg),
weight_mid_kg = as.numeric(weight_mid_kg),
weight_end_kg = as.numeric(weight_end_kg),
weight_mean_kg = rowMeans(cbind(weight_day0_kg, weight_mid_kg, weight_end_kg), na.rm = TRUE)
)
cow_cov <- age_tbl %>%
full_join(wt_tbl, by = c("cow_id","grazing_treatment"))
dat2 <- dat %>%
left_join(cow_cov, by = c("cow_id","grazing_treatment"))
# QA
qa <- tibble(
n_rows = nrow(dat2),
n_cows = n_distinct(dat2$cow_id),
missing_age_rows = sum(is.na(dat2$age_days)),
missing_weight_rows = sum(is.na(dat2$weight_mean_kg))
)
print(qa)
## # A tibble: 1 × 4
## n_rows n_cows missing_age_rows missing_weight_rows
## <int> <int> <int> <int>
## 1 2759 35 0 0
# -----------------------------
# 3) Fit baseline and adjusted AR1 CH4 models
# -----------------------------
m0 <- lme(
CH4_g_d ~ grazing_treatment * period_id,
random = ~1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat2,
method = "REML",
na.action = na.omit
)
m1 <- lme(
CH4_g_d ~ grazing_treatment * period_id + scale(age_days) + scale(weight_mean_kg),
random = ~1 | cow_id,
correlation = corAR1(form = ~ obs_index | cow_id),
data = dat2,
method = "REML",
na.action = na.omit
)
# -----------------------------
# 4) Compare fixed effects and fit
# -----------------------------
aov_m0 <- as.data.frame(anova(m0)) %>% mutate(model = "baseline")
aov_m1 <- as.data.frame(anova(m1)) %>% mutate(model = "age_weight_adjusted")
aov_all <- bind_rows(aov_m0, aov_m1, .id = "term_id")
fit_tbl <- tibble(
model = c("baseline", "age_weight_adjusted"),
AIC = c(AIC(m0), AIC(m1)),
BIC = c(BIC(m0), BIC(m1)),
logLik = c(as.numeric(logLik(m0)), as.numeric(logLik(m1)))
)
print(fit_tbl)
## # A tibble: 2 × 4
## model AIC BIC logLik
## <chr> <dbl> <dbl> <dbl>
## 1 baseline 31960. 32037. -15967.
## 2 age_weight_adjusted 31943. 32032. -15957.
print(aov_m0)
## numDF denDF F-value p-value model
## (Intercept) 1 2716 2901.746247 0.0000000000 baseline
## grazing_treatment 1 33 5.296654 0.0278184960 baseline
## period_id 4 2716 37.050044 0.0000000000 baseline
## grazing_treatment:period_id 4 2716 5.323581 0.0002865162 baseline
print(aov_m1)
## numDF denDF F-value p-value
## (Intercept) 1 2716 3677.395522 0.0000000000
## grazing_treatment 1 31 6.771631 0.0140778639
## period_id 4 2716 37.115385 0.0000000000
## scale(age_days) 1 31 1.079869 0.3067645572
## scale(weight_mean_kg) 1 31 9.503630 0.0042826629
## grazing_treatment:period_id 4 2716 5.330185 0.0002831043
## model
## (Intercept) age_weight_adjusted
## grazing_treatment age_weight_adjusted
## period_id age_weight_adjusted
## scale(age_days) age_weight_adjusted
## scale(weight_mean_kg) age_weight_adjusted
## grazing_treatment:period_id age_weight_adjusted
# -----------------------------
# 5) Period-wise K2-K1 contrasts
# -----------------------------
ctr_m0 <- as.data.frame(
contrast(emmeans(m0, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
) %>% mutate(model = "baseline")
ctr_m1 <- as.data.frame(
contrast(emmeans(m1, ~ grazing_treatment | period_id), method = "revpairwise", adjust = "tukey")
) %>% mutate(model = "age_weight_adjusted")
ctr_all <- bind_rows(ctr_m0, ctr_m1)
print(ctr_all)
## contrast period_id estimate SE df t.ratio p.value
## 1 K2 - K1 1 49.096135 13.60864 33 3.6077180 0.001008843
## 2 K2 - K1 2 45.833447 18.53361 33 2.4729912 0.018721156
## 3 K2 - K1 3 18.795533 14.60170 33 1.2872151 0.206974854
## 4 K2 - K1 4 8.379711 13.18213 33 0.6356871 0.529364449
## 5 K2 - K1 5 34.849093 13.85241 33 2.5157424 0.016924394
## 6 K2 - K1 1 44.994990 12.60780 31 3.5688209 0.001190974
## 7 K2 - K1 2 40.900153 17.79443 31 2.2984803 0.028439434
## 8 K2 - K1 3 14.524860 13.67916 31 1.0618238 0.296518157
## 9 K2 - K1 4 4.401621 12.14113 31 0.3625380 0.719410076
## 10 K2 - K1 5 31.579527 12.88182 31 2.4514801 0.020062370
## model
## 1 baseline
## 2 baseline
## 3 baseline
## 4 baseline
## 5 baseline
## 6 age_weight_adjusted
## 7 age_weight_adjusted
## 8 age_weight_adjusted
## 9 age_weight_adjusted
## 10 age_weight_adjusted
# -----------------------------
# 6) Save outputs
# -----------------------------
write_csv(qa, file.path(work_dir, "age_weight_join_QA_15v4.csv"))
write_csv(fit_tbl, file.path(work_dir, "CH4_AR1_fit_comparison_baseline_vs_age_weight_15v4.csv"))
write_csv(aov_all, file.path(work_dir, "CH4_AR1_anova_baseline_vs_age_weight_15v4.csv"))
write_csv(ctr_all, file.path(work_dir, "CH4_AR1_contrasts_baseline_vs_age_weight_15v4.csv"))
wb <- createWorkbook()
addWorksheet(wb, "QA")
addWorksheet(wb, "Fit_Comparison")
addWorksheet(wb, "ANOVA")
addWorksheet(wb, "Contrasts")
writeData(wb, "QA", qa)
writeData(wb, "Fit_Comparison", fit_tbl)
writeData(wb, "ANOVA", aov_all)
writeData(wb, "Contrasts", ctr_all)
saveWorkbook(wb, file.path(work_dir, "CH4_AR1_age_weight_adjustment_15v4.xlsx"), overwrite = TRUE)
library(tibble)
library(openxlsx)
compact <- tribble(
~Metric, ~Baseline, ~AgeWeight_Adjusted,
"AIC", "31959.56", "31943.21",
"Treatment p-value", "0.0278", "0.0141",
"Period p-value", "<0.0001", "<0.0001",
"Treatment×Period p-value", "0.0003", "0.0003",
"Age p-value", "—", "0.3068",
"Weight p-value", "—", "0.0043",
"Significant K2-K1 periods", "3/5 (1,2,5)", "3/5 (1,2,5)"
)
print(compact)
## # A tibble: 7 × 3
## Metric Baseline AgeWeight_Adjusted
## <chr> <chr> <chr>
## 1 AIC 31959.56 31943.21
## 2 Treatment p-value 0.0278 0.0141
## 3 Period p-value <0.0001 <0.0001
## 4 Treatment×Period p-value 0.0003 0.0003
## 5 Age p-value — 0.3068
## 6 Weight p-value — 0.0043
## 7 Significant K2-K1 periods 3/5 (1,2,5) 3/5 (1,2,5)
wb <- createWorkbook()
addWorksheet(wb, "Compact_Result")
writeData(wb, "Compact_Result", compact)
saveWorkbook(wb, "CH4_age_weight_compact_summary_15v4.xlsx", overwrite = TRUE)
library(tibble)
library(openxlsx)
compact <- tribble(
~Metric, ~Baseline, ~AgeWeight_Adjusted, ~Interpretation,
"AIC", "31959.56", "31943.21", "Lower AIC after adjustment indicates better fit.",
"Treatment p-value", "0.0278", "0.0141", "Treatment effect remains significant after adjusting for age and weight.",
"Period p-value", "<0.0001", "<0.0001", "Strong period effect in both models.",
"Treatment×Period p-value", "0.0003", "0.0003", "Interaction is stable; treatment differences vary by period in both models.",
"Age p-value", "—", "0.3068", "Age is not a significant predictor in the adjusted model.",
"Weight p-value", "—", "0.0043", "Weight is a significant covariate.",
"Significant K2-K1 periods", "3/5 (1,2,5)", "3/5 (1,2,5)", "Period-level treatment contrast pattern is unchanged (robust)."
)
print(compact)
## # A tibble: 7 × 4
## Metric Baseline AgeWeight_Adjusted Interpretation
## <chr> <chr> <chr> <chr>
## 1 AIC 31959.56 31943.21 Lower AIC after adju…
## 2 Treatment p-value 0.0278 0.0141 Treatment effect rem…
## 3 Period p-value <0.0001 <0.0001 Strong period effect…
## 4 Treatment×Period p-value 0.0003 0.0003 Interaction is stabl…
## 5 Age p-value — 0.3068 Age is not a signifi…
## 6 Weight p-value — 0.0043 Weight is a signific…
## 7 Significant K2-K1 periods 3/5 (1,2,5) 3/5 (1,2,5) Period-level treatme…
wb <- createWorkbook()
addWorksheet(wb, "Compact_Result")
writeData(wb, "Compact_Result", compact)
saveWorkbook(wb, "CH4_age_weight_compact_summary_with_interpretation_15v4.xlsx", overwrite = TRUE)
# Checking grazing treatment actual contribution to variation
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(openxlsx)
# -----------------------------
# Paths
# -----------------------------
base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")
# -----------------------------
# Load analysis data (15v4)
# -----------------------------
vis <- read_csv(file.path(work_dir, "analysis_3SD_with_14day_periods_4hr_bins.csv"), show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id)),
CH4_g_d = as.numeric(CH4_g_d)
)
qual <- read_csv(file.path(work_dir, "cow_period_retained_15v_4bins.csv"), show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
period_id = factor(as.integer(period_id))
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(is.finite(CH4_g_d)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
# -----------------------------
# Load age + weight covariates
# -----------------------------
age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
transmute(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
age_days = as.numeric(cow_age_in_days)
)
wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
transmute(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
weight_mean_kg = rowMeans(
cbind(as.numeric(weight_day0_kg), as.numeric(weight_mid_kg), as.numeric(weight_end_kg)),
na.rm = TRUE
)
)
dat2 <- dat %>%
left_join(age_tbl, by = c("cow_id","grazing_treatment")) %>%
left_join(wt_tbl, by = c("cow_id","grazing_treatment"))
# -----------------------------
# Helper functions
# -----------------------------
fit_ar1_ml <- function(fml, data) {
lme(
fixed = fml,
random = ~1 | cow_id,
correlation = corAR1(form = ~obs_index | cow_id),
data = data,
method = "ML",
na.action = na.omit
)
}
pseudo_r2 <- function(mod) {
y <- model.response(model.frame(formula(mod), data = getData(mod)))
e <- resid(mod, type = "response")
1 - var(e, na.rm = TRUE) / var(y, na.rm = TRUE)
}
# -----------------------------
# Fit models
# -----------------------------
m_full <- fit_ar1_ml(CH4_g_d ~ grazing_treatment * period_id + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_trt <- fit_ar1_ml(CH4_g_d ~ period_id + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_period <- fit_ar1_ml(CH4_g_d ~ grazing_treatment + scale(age_days) + scale(weight_mean_kg), dat2)
m_no_agewt <- fit_ar1_ml(CH4_g_d ~ grazing_treatment * period_id, dat2)
R_full <- pseudo_r2(m_full)
R_no_trt <- pseudo_r2(m_no_trt)
R_no_period <- pseudo_r2(m_no_period)
R_no_agewt <- pseudo_r2(m_no_agewt)
# Absolute contribution in explained variance
contrib_treatment <- R_full - R_no_trt
contrib_period <- R_full - R_no_period
contrib_ageweight <- R_full - R_no_agewt
# Percent of explained variance attributable to treatment
pct_treatment_of_explained <- 100 * contrib_treatment / R_full
pct_other_of_explained <- 100 - pct_treatment_of_explained
do
## function (.data, ...)
## {
## lifecycle::signal_stage("superseded", "do()")
## UseMethod("do")
## }
## <bytecode: 0x11a3303e0>
## <environment: namespace:dplyr>
result_tbl <- tibble(
metric = c(
"R2_full",
"R2_no_treatment",
"R2_no_period",
"R2_no_age_weight",
"treatment_abs_contrib",
"period_abs_contrib",
"age_weight_abs_contrib",
"treatment_pct_of_explained",
"other_pct_of_explained"
),
value = c(
R_full,
R_no_trt,
R_no_period,
R_no_agewt,
contrib_treatment,
contrib_period,
contrib_ageweight,
pct_treatment_of_explained,
pct_other_of_explained
)
)
print(result_tbl)
## # A tibble: 9 × 2
## metric value
## <chr> <dbl>
## 1 R2_full 0.230
## 2 R2_no_treatment 0.223
## 3 R2_no_period 0.170
## 4 R2_no_age_weight 0.230
## 5 treatment_abs_contrib 0.00703
## 6 period_abs_contrib 0.0591
## 7 age_weight_abs_contrib -0.000480
## 8 treatment_pct_of_explained 3.06
## 9 other_pct_of_explained 96.9
# Save
write_csv(result_tbl, file.path(work_dir, "CH4_contribution_treatment_vs_other_15v4.csv"))
wb <- createWorkbook()
addWorksheet(wb, "CH4_Contribution")
writeData(wb, "CH4_Contribution", result_tbl)
saveWorkbook(wb, file.path(work_dir, "CH4_contribution_treatment_vs_other_15v4.xlsx"), overwrite = TRUE)
# Check rotation frequency effect:
library(readr)
library(readxl)
library(dplyr)
library(nlme)
library(emmeans)
library(openxlsx)
base_dir <- "/Users/dolapo/Library/CloudStorage/GoogleDrive-daadepoj@ualberta.ca/Other computers/Dell/Academics/Python/ALES_Analysis/ALES_codex"
work_dir <- file.path(base_dir, "Feb13_emission_analysis")
excel_origin <- as.Date("1899-12-30")
dat_raw <- read_csv(file.path(work_dir, "analysis_3SD_enriched.csv"), show_col_types = FALSE)
dat <- dat_raw %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
CH4_g_d = as.numeric(CH4_g_d),
start_time = as.numeric(start_time),
visit_date = if ("visit_date" %in% names(dat_raw)) as.Date(visit_date) else (excel_origin + floor(start_time))
) %>%
filter(is.finite(CH4_g_d), !is.na(visit_date), !is.na(grazing_treatment)) %>%
mutate(
rotation_phase = case_when(
visit_date >= as.Date("2025-06-27") & visit_date <= as.Date("2025-07-30") ~ "Cycle1_2to4d",
visit_date >= as.Date("2025-07-31") & visit_date <= as.Date("2025-09-09") ~ "Cycle2_gt4d",
TRUE ~ NA_character_
),
rotation_phase = factor(rotation_phase, levels = c("Cycle1_2to4d", "Cycle2_gt4d")),
day_index = as.numeric(visit_date - min(visit_date, na.rm = TRUE))
) %>%
filter(!is.na(rotation_phase)) %>%
group_by(cow_id) %>%
arrange(start_time, .by_group = TRUE) %>%
mutate(obs_index = row_number()) %>%
ungroup()
age_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_With_Age.xlsx"), sheet = "K1_K2_Age_Data") %>%
transmute(cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
age_days = as.numeric(cow_age_in_days))
wt_tbl <- read_excel(file.path(base_dir, "K1_K2_Cows_All_Weights.xlsx"), sheet = "K1_K2_All_Weights") %>%
transmute(cow_id = as.character(cow_id),
grazing_treatment = factor(toupper(as.character(grazing_treatment)), levels = c("K1","K2")),
weight_mean_kg = rowMeans(cbind(as.numeric(weight_day0_kg), as.numeric(weight_mid_kg), as.numeric(weight_end_kg)), na.rm = TRUE))
dat2 <- dat %>%
left_join(age_tbl, by = c("cow_id","grazing_treatment")) %>%
left_join(wt_tbl, by = c("cow_id","grazing_treatment"))
m_rot <- lme(
CH4_g_d ~ grazing_treatment * rotation_phase + scale(age_days) + scale(weight_mean_kg) + scale(day_index),
random = ~1 | cow_id,
correlation = corAR1(form = ~obs_index | cow_id),
data = dat2,
method = "REML",
na.action = na.omit
)
emm <- emmeans(m_rot, ~ grazing_treatment * rotation_phase)
did_df <- as.data.frame(contrast(emm, method = list("DiD_rotation_effect" = c(-1, 1, 1, -1))))
print(did_df)
## contrast estimate SE df t.ratio p.value
## DiD_rotation_effect 29.3701 6.379368 3320 4.604 <.0001
##
## Degrees-of-freedom method: containment
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, "DiD_CH4")
writeData(wb, "DiD_CH4", did_df)
saveWorkbook(wb, "rotation_effect_DiD_CH4_after3SD.xlsx", overwrite = TRUE)
# Check visit distribution based on TOD
library(readr)
library(dplyr)
library(openxlsx)
library(ggplot2)
# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
visit_date = as.Date(visit_date),
time_bin_4hr = as.character(time_bin_4hr)
)
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id")) %>%
filter(!is.na(grazing_treatment))
# -----------------------------
# 1) Overall visit counts by treatment
# -----------------------------
visits_overall <- dat %>%
group_by(grazing_treatment) %>%
summarise(
total_visits = n(),
n_cows = n_distinct(cow_id),
n_periods = n_distinct(period_id),
.groups = "drop"
)
# -----------------------------
# 2) Visits by period x treatment
# -----------------------------
visits_by_period <- dat %>%
group_by(period_id, grazing_treatment) %>%
summarise(
total_visits = n(),
n_cows = n_distinct(cow_id),
visits_per_cow = total_visits / n_cows,
.groups = "drop"
) %>%
arrange(period_id, grazing_treatment)
# -----------------------------
# 3) Visits by 4h bin x treatment (TOD distribution)
# -----------------------------
visits_by_bin <- dat %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(total_visits = n(), .groups = "drop") %>%
group_by(grazing_treatment) %>%
mutate(
pct_within_treatment = 100 * total_visits / sum(total_visits)
) %>%
ungroup()
# -----------------------------
# 4) Daily visits by treatment
# -----------------------------
visits_by_day <- dat %>%
group_by(visit_date, grazing_treatment) %>%
summarise(n_visits = n(), .groups = "drop") %>%
arrange(visit_date, grazing_treatment)
visits_by_day_summary <- visits_by_day %>%
group_by(grazing_treatment) %>%
summarise(
n_days = n_distinct(visit_date),
mean_visits_per_day = mean(n_visits),
sd_visits_per_day = sd(n_visits),
min_visits_per_day = min(n_visits),
max_visits_per_day = max(n_visits),
.groups = "drop"
)
# -----------------------------
# 5) Plots
# -----------------------------
p_bin <- ggplot(visits_by_bin,
aes(x = time_bin_4hr, y = total_visits, fill = grazing_treatment)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(title = "Visit Distribution by 4-hour TOD Bin",
x = "4-hour bin", y = "Number of visits", fill = "Treatment") +
theme_minimal(base_size = 13) +
theme(axis.text.x = element_text(angle = 25, hjust = 1))
ggsave("TOD_visit_distribution_by_bin_K1_K2_15v4.png", p_bin, width = 9, height = 5.5, dpi = 320)
p_period <- ggplot(visits_by_period,
aes(x = factor(period_id), y = total_visits, fill = grazing_treatment)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(title = "Visit Distribution by 14-day Period",
x = "Period", y = "Number of visits", fill = "Treatment") +
theme_minimal(base_size = 13)
ggsave("TOD_visit_distribution_by_period_K1_K2_15v4.png", p_period, width = 8.5, height = 5.5, dpi = 320)
# -----------------------------
# 6) Save tables
# -----------------------------
write_csv(visits_overall, "TOD_visits_overall_by_treatment_15v4.csv")
write_csv(visits_by_period, "TOD_visits_by_period_treatment_15v4.csv")
write_csv(visits_by_bin, "TOD_visits_by_4hr_bin_treatment_15v4.csv")
write_csv(visits_by_day, "TOD_visits_by_day_treatment_15v4.csv")
write_csv(visits_by_day_summary, "TOD_visits_by_day_summary_treatment_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Overall")
addWorksheet(wb, "By_Period")
addWorksheet(wb, "By_4hr_Bin")
addWorksheet(wb, "By_Day")
addWorksheet(wb, "By_Day_Summary")
writeData(wb, "Overall", visits_overall)
writeData(wb, "By_Period", visits_by_period)
writeData(wb, "By_4hr_Bin", visits_by_bin)
writeData(wb, "By_Day", visits_by_day)
writeData(wb, "By_Day_Summary", visits_by_day_summary)
saveWorkbook(wb, "TOD_visit_distribution_K1_K2_15v4.xlsx", overwrite = TRUE)
print(visits_overall)
## # A tibble: 2 × 4
## grazing_treatment total_visits n_cows n_periods
## <chr> <int> <int> <int>
## 1 K1 1427 19 5
## 2 K2 1332 16 5
print(visits_by_period)
## # A tibble: 10 × 5
## period_id grazing_treatment total_visits n_cows visits_per_cow
## <int> <chr> <int> <int> <dbl>
## 1 1 K1 354 16 22.1
## 2 1 K2 309 11 28.1
## 3 2 K1 54 3 18
## 4 2 K2 184 10 18.4
## 5 3 K1 162 9 18
## 6 3 K2 306 14 21.9
## 7 4 K1 521 18 28.9
## 8 4 K2 298 15 19.9
## 9 5 K1 336 16 21
## 10 5 K2 235 12 19.6
print(visits_by_bin)
## # A tibble: 12 × 4
## grazing_treatment time_bin_4hr total_visits pct_within_treatment
## <chr> <chr> <int> <dbl>
## 1 K1 00:00-04:00 11 0.771
## 2 K1 04:00-08:00 362 25.4
## 3 K1 08:00-12:00 258 18.1
## 4 K1 12:00-16:00 292 20.5
## 5 K1 16:00-20:00 392 27.5
## 6 K1 20:00-24:00 112 7.85
## 7 K2 00:00-04:00 53 3.98
## 8 K2 04:00-08:00 202 15.2
## 9 K2 08:00-12:00 354 26.6
## 10 K2 12:00-16:00 321 24.1
## 11 K2 16:00-20:00 275 20.6
## 12 K2 20:00-24:00 127 9.53
print(visits_by_day_summary)
## # A tibble: 2 × 6
## grazing_treatment n_days mean_visits_per_day sd_visits_per_day
## <chr> <int> <dbl> <dbl>
## 1 K1 62 23.0 13.5
## 2 K2 70 19.0 9.28
## # ℹ 2 more variables: min_visits_per_day <int>, max_visits_per_day <int>
# Compare visit distribution
library(readr)
library(dplyr)
library(ggplot2)
# Input from your saved output
bin_df <- read_csv("TOD_visits_by_4hr_bin_treatment_15v4.csv", show_col_types = FALSE) %>%
mutate(
grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
time_bin_4hr = factor(
time_bin_4hr,
levels = c("00:00-04:00","04:00-08:00","08:00-12:00",
"12:00-16:00","16:00-20:00","20:00-24:00")
)
)
# 1) Compare visit COUNTS by bin (K1 vs K2)
p_count <- ggplot(bin_df, aes(x = time_bin_4hr, y = total_visits, fill = grazing_treatment)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
scale_fill_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(
title = "TOD Visit Count Distribution: K1 vs K2 (15v4)",
x = "4-hour time bin",
y = "Visit count",
fill = "Treatment"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 25, hjust = 1)
)
# 2) Compare visit PERCENT by bin (within each treatment)
p_pct <- ggplot(bin_df, aes(x = time_bin_4hr, y = pct_within_treatment, color = grazing_treatment, group = grazing_treatment)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2.8) +
scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(
title = "TOD Visit Percentage Distribution: K1 vs K2 (15v4)",
x = "4-hour time bin",
y = "Percent of visits within treatment (%)",
color = "Treatment"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 25, hjust = 1)
)
ggsave("TOD_compare_K1_K2_counts_15v4.png", p_count, width = 10, height = 6, dpi = 320)
ggsave("TOD_compare_K1_K2_percent_15v4.png", p_pct, width = 10, height = 6, dpi = 320)
print(p_count)

print(p_pct)

# Check visit distribution statistical significance
library(readr)
library(dplyr)
d <- read_csv("TOD_visits_by_4hr_bin_treatment_15v4.csv", show_col_types = FALSE)
tab <- xtabs(total_visits ~ grazing_treatment + time_bin_4hr, data = d)
print(tab)
## time_bin_4hr
## grazing_treatment 00:00-04:00 04:00-08:00 08:00-12:00 12:00-16:00 16:00-20:00
## K1 11 362 258 292 392
## K2 53 202 354 321 275
## time_bin_4hr
## grazing_treatment 20:00-24:00
## K1 112
## K2 127
chi <- chisq.test(tab)
print(chi)
##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 107.7, df = 5, p-value < 2.2e-16
cat("\nExpected counts:\n")
##
## Expected counts:
print(chi$expected)
## time_bin_4hr
## grazing_treatment 00:00-04:00 04:00-08:00 08:00-12:00 12:00-16:00 16:00-20:00
## K1 33.10185 291.71 316.5364 317.0536 344.9833
## K2 30.89815 272.29 295.4636 295.9464 322.0167
## time_bin_4hr
## grazing_treatment 20:00-24:00
## K1 123.6147
## K2 115.3853
# Optional effect size
n <- sum(tab)
r <- nrow(tab)
c <- ncol(tab)
cramers_v <- sqrt(as.numeric(chi$statistic) / (n * min(r - 1, c - 1)))
cat("\nCramer's V:", cramers_v, "\n")
##
## Cramer's V: 0.1975792
# If expected counts are small, run Fisher as sensitivity
if (any(chi$expected < 5)) {
cat("\nSome expected counts < 5; running Fisher's exact test (may be slow)...\n")
print(fisher.test(tab))
}
# Estimate CH4 and CO2 emissions based on TOD
library(readr)
library(dplyr)
library(ggplot2)
library(openxlsx)
# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
CO2_g_d = as.numeric(CO2_g_d),
time_bin_4hr = as.character(time_bin_4hr)
)
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
filter(
!is.na(time_bin_4hr),
is.finite(CH4_g_d),
is.finite(CO2_g_d)
) %>%
mutate(
grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
time_bin_4hr = factor(
time_bin_4hr,
levels = c("00:00-04:00","04:00-08:00","08:00-12:00",
"12:00-16:00","16:00-20:00","20:00-24:00")
)
)
# -----------------------------
# CH4 by bin x treatment
# -----------------------------
ch4_bin <- dat %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(
n_visits = n(),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_visits),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
.groups = "drop"
)
# -----------------------------
# CO2 by bin x treatment
# -----------------------------
co2_bin <- dat %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(
n_visits = n(),
CO2_mean = mean(CO2_g_d, na.rm = TRUE),
CO2_sd = sd(CO2_g_d, na.rm = TRUE),
CO2_se = CO2_sd / sqrt(n_visits),
CO2_lcl_95 = CO2_mean - 1.96 * CO2_se,
CO2_ucl_95 = CO2_mean + 1.96 * CO2_se,
.groups = "drop"
)
# -----------------------------
# Plot CH4
# -----------------------------
p_ch4 <- ggplot(ch4_bin, aes(x = time_bin_4hr, y = CH4_mean, color = grazing_treatment, group = grazing_treatment)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2.8) +
geom_errorbar(aes(ymin = CH4_lcl_95, ymax = CH4_ucl_95), width = 0.15, linewidth = 0.7) +
scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(
title = "CH4 Emission by 4-hour Time Bin (15v4)",
x = "4-hour time bin",
y = "Mean CH4 (g/day)",
color = "Treatment"
) +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 25, hjust = 1))
# -----------------------------
# Plot CO2
# -----------------------------
p_co2 <- ggplot(co2_bin, aes(x = time_bin_4hr, y = CO2_mean, color = grazing_treatment, group = grazing_treatment)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2.8) +
geom_errorbar(aes(ymin = CO2_lcl_95, ymax = CO2_ucl_95), width = 0.15, linewidth = 0.7) +
scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
labs(
title = "CO2 Emission by 4-hour Time Bin (15v4)",
x = "4-hour time bin",
y = "Mean CO2 (g/day)",
color = "Treatment"
) +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 25, hjust = 1))
ggsave("CH4_by_timebin_K1_K2_15v4.png", p_ch4, width = 10, height = 6, dpi = 320)
ggsave("CO2_by_timebin_K1_K2_15v4.png", p_co2, width = 10, height = 6, dpi = 320)
print(p_ch4)

print(p_co2)

# -----------------------------
# Save tables
# -----------------------------
write_csv(ch4_bin, "CH4_by_timebin_treatment_15v4.csv")
write_csv(co2_bin, "CO2_by_timebin_treatment_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "CH4_by_bin")
addWorksheet(wb, "CO2_by_bin")
writeData(wb, "CH4_by_bin", ch4_bin)
writeData(wb, "CO2_by_bin", co2_bin)
saveWorkbook(wb, "Emissions_by_timebin_K1_K2_15v4.xlsx", overwrite = TRUE)
# Check the reliabinlity of each time bin:
library(readr)
library(dplyr)
library(openxlsx)
# Inputs
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# Load TOD-qualified rows
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id","grazing_treatment","period_id"))
# Total cows per treatment in analysis
cow_totals <- dat %>%
group_by(grazing_treatment) %>%
summarise(total_cows = n_distinct(cow_id), .groups = "drop")
# Reliability by treatment x time bin
reliability <- dat %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(
n_visits = n(),
n_cows_contributing = n_distinct(cow_id),
mean_CH4 = mean(CH4_g_d, na.rm = TRUE),
sd_CH4 = sd(CH4_g_d, na.rm = TRUE),
se_CH4 = sd_CH4 / sqrt(n_visits),
cv_percent = 100 * sd_CH4 / mean_CH4,
.groups = "drop"
) %>%
left_join(cow_totals, by = "grazing_treatment") %>%
mutate(
cow_coverage_pct = 100 * n_cows_contributing / total_cows,
reliability_flag = case_when(
n_cows_contributing < 8 ~ "Low reliability",
cow_coverage_pct < 50 ~ "Low reliability",
n_visits < 30 ~ "Caution",
TRUE ~ "Adequate"
)
) %>%
arrange(grazing_treatment, time_bin_4hr)
print(reliability)
## # A tibble: 12 × 11
## grazing_treatment time_bin_4hr n_visits n_cows_contributing mean_CH4 sd_CH4
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 K1 00:00-04:00 11 9 329. 79.3
## 2 K1 04:00-08:00 362 19 317. 81.5
## 3 K1 08:00-12:00 258 19 310. 86.6
## 4 K1 12:00-16:00 292 19 293. 96.8
## 5 K1 16:00-20:00 392 19 312. 85.8
## 6 K1 20:00-24:00 112 19 330. 82.8
## 7 K2 00:00-04:00 53 12 408. 82.8
## 8 K2 04:00-08:00 202 16 325. 84.9
## 9 K2 08:00-12:00 354 16 330. 88.4
## 10 K2 12:00-16:00 321 16 341. 88.5
## 11 K2 16:00-20:00 275 16 348. 85.9
## 12 K2 20:00-24:00 127 15 360. 88.6
## # ℹ 5 more variables: se_CH4 <dbl>, cv_percent <dbl>, total_cows <int>,
## # cow_coverage_pct <dbl>, reliability_flag <chr>
# Optional: midnight-specific check
midnight_check <- reliability %>% filter(time_bin_4hr == "00:00-04:00")
cat("\n=== Midnight bin reliability ===\n")
##
## === Midnight bin reliability ===
print(midnight_check)
## # A tibble: 2 × 11
## grazing_treatment time_bin_4hr n_visits n_cows_contributing mean_CH4 sd_CH4
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 K1 00:00-04:00 11 9 329. 79.3
## 2 K2 00:00-04:00 53 12 408. 82.8
## # ℹ 5 more variables: se_CH4 <dbl>, cv_percent <dbl>, total_cows <int>,
## # cow_coverage_pct <dbl>, reliability_flag <chr>
write_csv(reliability, "CH4_timebin_reliability_15v4.csv")
write_csv(midnight_check, "CH4_midnight_reliability_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Reliability_All_Bins")
addWorksheet(wb, "Midnight_Only")
writeData(wb, "Reliability_All_Bins", reliability)
writeData(wb, "Midnight_Only", midnight_check)
saveWorkbook(wb, "CH4_timebin_reliability_15v4.xlsx", overwrite = TRUE)
#Filter for minimum of 2 visits per cow
library(readr)
library(dplyr)
library(openxlsx)
# Inputs
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# Load TOD-qualified rows
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
time_bin_4hr = as.character(time_bin_4hr)
) %>%
filter(is.finite(CH4_g_d), !is.na(time_bin_4hr))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id"))
# -----------------------------
# Require >=2 visits per cow within each time bin
# -----------------------------
cow_bin_counts <- dat %>%
group_by(cow_id, grazing_treatment, time_bin_4hr) %>%
summarise(n_visits_cow_bin = n(), .groups = "drop")
eligible_cow_bin <- cow_bin_counts %>%
filter(n_visits_cow_bin >= 2)
dat_reliable <- dat %>%
inner_join(
eligible_cow_bin %>% select(cow_id, grazing_treatment, time_bin_4hr),
by = c("cow_id", "grazing_treatment", "time_bin_4hr")
)
# Coverage summary
coverage <- cow_bin_counts %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(
n_cows_all = n_distinct(cow_id),
n_cows_ge2 = n_distinct(cow_id[n_visits_cow_bin >= 2]),
pct_cows_ge2 = 100 * n_cows_ge2 / n_cows_all,
.groups = "drop"
) %>%
arrange(grazing_treatment, time_bin_4hr)
# CH4 mean by bin after reliability filter
ch4_bin_reliable <- dat_reliable %>%
group_by(grazing_treatment, time_bin_4hr) %>%
summarise(
n_visits = n(),
n_cows_contributing = n_distinct(cow_id),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_visits),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
.groups = "drop"
) %>%
arrange(grazing_treatment, time_bin_4hr)
# Print key outputs
cat("Rows before filter:", nrow(dat), "\n")
## Rows before filter: 2759
cat("Rows after >=2 visits/cow/bin filter:", nrow(dat_reliable), "\n\n")
## Rows after >=2 visits/cow/bin filter: 2747
cat("=== Cow coverage by bin (>=2 visits rule) ===\n")
## === Cow coverage by bin (>=2 visits rule) ===
print(coverage)
## # A tibble: 12 × 5
## grazing_treatment time_bin_4hr n_cows_all n_cows_ge2 pct_cows_ge2
## <chr> <chr> <int> <int> <dbl>
## 1 K1 00:00-04:00 9 2 22.2
## 2 K1 04:00-08:00 19 19 100
## 3 K1 08:00-12:00 19 19 100
## 4 K1 12:00-16:00 19 19 100
## 5 K1 16:00-20:00 19 19 100
## 6 K1 20:00-24:00 19 17 89.5
## 7 K2 00:00-04:00 12 9 75
## 8 K2 04:00-08:00 16 16 100
## 9 K2 08:00-12:00 16 16 100
## 10 K2 12:00-16:00 16 16 100
## 11 K2 16:00-20:00 16 16 100
## 12 K2 20:00-24:00 15 15 100
cat("\n=== CH4 by bin after >=2 visits rule ===\n")
##
## === CH4 by bin after >=2 visits rule ===
print(ch4_bin_reliable)
## # A tibble: 12 × 9
## grazing_treatment time_bin_4hr n_visits n_cows_contributing CH4_mean CH4_sd
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 K1 00:00-04:00 4 2 357. 97.5
## 2 K1 04:00-08:00 362 19 317. 81.5
## 3 K1 08:00-12:00 258 19 310. 86.6
## 4 K1 12:00-16:00 292 19 293. 96.8
## 5 K1 16:00-20:00 392 19 312. 85.8
## 6 K1 20:00-24:00 110 17 331. 83.2
## 7 K2 00:00-04:00 50 9 408. 84.0
## 8 K2 04:00-08:00 202 16 325. 84.9
## 9 K2 08:00-12:00 354 16 330. 88.4
## 10 K2 12:00-16:00 321 16 341. 88.5
## 11 K2 16:00-20:00 275 16 348. 85.9
## 12 K2 20:00-24:00 127 15 360. 88.6
## # ℹ 3 more variables: CH4_se <dbl>, CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>
# Save
write_csv(coverage, "CH4_timebin_cow_coverage_ge2_15v4.csv")
write_csv(ch4_bin_reliable, "CH4_by_timebin_reliable_ge2visits_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Coverage_ge2")
addWorksheet(wb, "CH4_by_bin_ge2")
writeData(wb, "Coverage_ge2", coverage)
writeData(wb, "CH4_by_bin_ge2", ch4_bin_reliable)
saveWorkbook(wb, "CH4_timebin_reliable_ge2visits_15v4.xlsx", overwrite = TRUE)
library(readr)
library(dplyr)
x <- read_csv("CH4_by_timebin_reliable_ge2visits_15v4.csv", show_col_types = FALSE)
x2 <- x %>%
mutate(
CH4_CI95 = paste0("(", round(CH4_lcl_95, 1), ", ", round(CH4_ucl_95, 1), ")"),
CI_width = CH4_ucl_95 - CH4_lcl_95,
verdict = case_when(
n_cows_contributing < 5 | n_visits < 20 ~ "Low reliability",
n_cows_contributing < 10 | n_visits < 50 ~ "Moderate reliability",
TRUE ~ "Adequate reliability"
)
) %>%
arrange(grazing_treatment, time_bin_4hr)
print(x2)
## # A tibble: 12 × 12
## grazing_treatment time_bin_4hr n_visits n_cows_contributing CH4_mean CH4_sd
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 K1 00:00-04:00 4 2 357. 97.5
## 2 K1 04:00-08:00 362 19 317. 81.5
## 3 K1 08:00-12:00 258 19 310. 86.6
## 4 K1 12:00-16:00 292 19 293. 96.8
## 5 K1 16:00-20:00 392 19 312. 85.8
## 6 K1 20:00-24:00 110 17 331. 83.2
## 7 K2 00:00-04:00 50 9 408. 84.0
## 8 K2 04:00-08:00 202 16 325. 84.9
## 9 K2 08:00-12:00 354 16 330. 88.4
## 10 K2 12:00-16:00 321 16 341. 88.5
## 11 K2 16:00-20:00 275 16 348. 85.9
## 12 K2 20:00-24:00 127 15 360. 88.6
## # ℹ 6 more variables: CH4_se <dbl>, CH4_lcl_95 <dbl>, CH4_ucl_95 <dbl>,
## # CH4_CI95 <chr>, CI_width <dbl>, verdict <chr>
write_csv(x2, "CH4_timebin_reliable_ge2visits_15v4_with_CI_verdict.csv")
#Emissions and visit distribution at herd level over 24 hours
library(readr)
library(dplyr)
library(ggplot2)
library(openxlsx)
# -----------------------------
# Inputs
# -----------------------------
visits_file <- "analysis_3SD_with_14day_periods_4hr_bins.csv"
qual_file <- "cow_period_retained_15v_4bins.csv"
# -----------------------------
# Load TOD-qualified rows (15v4)
# -----------------------------
vis <- read_csv(visits_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id),
CH4_g_d = as.numeric(CH4_g_d),
hour_of_the_day = as.numeric(hour_of_the_day)
) %>%
filter(is.finite(CH4_g_d), is.finite(hour_of_the_day))
qual <- read_csv(qual_file, show_col_types = FALSE) %>%
mutate(
cow_id = as.character(cow_id),
grazing_treatment = toupper(as.character(grazing_treatment)),
period_id = as.integer(period_id)
) %>%
distinct(cow_id, grazing_treatment, period_id)
dat <- vis %>%
inner_join(qual, by = c("cow_id", "grazing_treatment", "period_id")) %>%
mutate(
grazing_treatment = factor(grazing_treatment, levels = c("K1","K2")),
hour_24 = as.integer(floor(hour_of_the_day))
) %>%
filter(hour_24 >= 0, hour_24 <= 23)
# -----------------------------
# Visit distribution by hour (0-23)
# -----------------------------
visits_hour <- dat %>%
group_by(grazing_treatment, hour_24) %>%
summarise(
n_visits = n(),
n_cows = n_distinct(cow_id),
.groups = "drop"
) %>%
complete(grazing_treatment, hour_24 = 0:23, fill = list(n_visits = 0, n_cows = 0)) %>%
group_by(grazing_treatment) %>%
mutate(pct_visits = 100 * n_visits / sum(n_visits)) %>%
ungroup()
# -----------------------------
# CH4 by hour (0-23)
# -----------------------------
ch4_hour <- dat %>%
group_by(grazing_treatment, hour_24) %>%
summarise(
n_visits = n(),
n_cows = n_distinct(cow_id),
CH4_mean = mean(CH4_g_d, na.rm = TRUE),
CH4_sd = sd(CH4_g_d, na.rm = TRUE),
CH4_se = CH4_sd / sqrt(n_visits),
CH4_lcl_95 = CH4_mean - 1.96 * CH4_se,
CH4_ucl_95 = CH4_mean + 1.96 * CH4_se,
.groups = "drop"
) %>%
complete(grazing_treatment, hour_24 = 0:23)
# -----------------------------
# Plot 1: Visit distribution by hour
# -----------------------------
p_visits <- ggplot(visits_hour, aes(x = hour_24, y = n_visits, color = grazing_treatment)) +
geom_line(linewidth = 1.05) +
geom_point(size = 2) +
scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
scale_x_continuous(breaks = 0:23) +
labs(
title = "Visit Distribution Over 24 Hours (15v4)",
x = "Hour of day (0-23)",
y = "Visit count",
color = "Treatment"
) +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))
# -----------------------------
# Plot 2: CH4 over 24 hours
# -----------------------------
p_ch4 <- ggplot(ch4_hour, aes(x = hour_24, y = CH4_mean, color = grazing_treatment, group = grazing_treatment)) +
geom_line(linewidth = 1.05, na.rm = TRUE) +
geom_point(size = 2, na.rm = TRUE) +
geom_errorbar(aes(ymin = CH4_lcl_95, ymax = CH4_ucl_95), width = 0.2, linewidth = 0.6, na.rm = TRUE) +
scale_color_manual(values = c("K1" = "forestgreen", "K2" = "steelblue")) +
scale_x_continuous(breaks = 0:23) +
labs(
title = "CH4 Emissions Over 24 Hours (15v4)",
x = "Hour of day (0-23)",
y = "Mean CH4 (g/day)",
color = "Treatment"
) +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))
ggsave("TOD_visits_by_hour_0_23_15v4.png", p_visits, width = 12, height = 5.5, dpi = 320)
ggsave("TOD_CH4_by_hour_0_23_15v4.png", p_ch4, width = 12, height = 5.5, dpi = 320)
print(p_visits)

print(p_ch4)

# -----------------------------
# Save tables
# -----------------------------
write_csv(visits_hour, "TOD_visits_by_hour_0_23_15v4.csv")
write_csv(ch4_hour, "TOD_CH4_by_hour_0_23_15v4.csv")
wb <- createWorkbook()
addWorksheet(wb, "Visits_by_hour")
addWorksheet(wb, "CH4_by_hour")
writeData(wb, "Visits_by_hour", visits_hour)
writeData(wb, "CH4_by_hour", ch4_hour)
saveWorkbook(wb, "TOD_CH4_and_visits_by_hour_0_23_15v4.xlsx", overwrite = TRUE)