1 1. Data Loading and Preparation

1.1 1.1 Load Raw Data

# Load the 3SD filtered dataset
raw_data <- read_excel("gf_full_dataset_3SD_filtered.xlsx")

cat("Dataset loaded successfully!\n")
## Dataset loaded successfully!
cat("Dimensions:", nrow(raw_data), "observations x", ncol(raw_data), "variables\n\n")
## Dimensions: 3355 observations x 21 variables
cat("First few rows:\n")
## First few rows:
print(head(raw_data))
## # A tibble: 6 × 21
##   cow_id grazing_treatment start_time          end_time           
##   <chr>  <chr>             <dttm>              <dttm>             
## 1 252H   K2_CON            2025-06-27 14:55:30 2025-06-27 14:58:38
## 2 252H   K2_CON            2025-06-27 19:47:12 2025-06-27 19:52:15
## 3 252H   K2_CON            2025-06-27 22:03:13 2025-06-27 22:05:37
## 4 252H   K2_CON            2025-06-28 00:49:31 2025-06-28 00:54:25
## 5 252H   K2_CON            2025-06-28 19:20:02 2025-06-28 19:26:41
## 6 252H   K2_CON            2025-06-29 01:07:48 2025-06-29 01:10:02
## # ℹ 17 more variables: good_data_duration <dttm>, hour_of_day <dbl>,
## #   co2_massflow_g_d <dbl>, ch4_massflow_g_d <dbl>, o2_massflow_g_d <dbl>,
## #   average_airflow_l_s <dbl>, airflow_cf <dbl>, wind_cf <dbl>, Time_Bin <chr>,
## #   Week_Number <dbl>, Filtered_Status <chr>, row_id <dbl>, GF_Week <chr>,
## #   Phase <chr>, Window_14d <chr>, Visit_Date <dttm>, Cycle <chr>
# Check data structure
cat("\nData structure:\n")
## 
## Data structure:
str(raw_data)
## tibble [3,355 × 21] (S3: tbl_df/tbl/data.frame)
##  $ cow_id             : chr [1:3355] "252H" "252H" "252H" "252H" ...
##  $ grazing_treatment  : chr [1:3355] "K2_CON" "K2_CON" "K2_CON" "K2_CON" ...
##  $ start_time         : POSIXct[1:3355], format: "2025-06-27 14:55:30" "2025-06-27 19:47:12" ...
##  $ end_time           : POSIXct[1:3355], format: "2025-06-27 14:58:38" "2025-06-27 19:52:15" ...
##  $ good_data_duration : POSIXct[1:3355], format: "1900-01-01 00:03:04" "1900-01-01 00:04:59" ...
##  $ hour_of_day        : num [1:3355] 14 19 22 0 19 1 7 13 20 1 ...
##  $ co2_massflow_g_d   : num [1:3355] 8856 8851 9855 9809 12196 ...
##  $ ch4_massflow_g_d   : num [1:3355] 367 360 321 401 396 ...
##  $ o2_massflow_g_d    : num [1:3355] 0 0 0 0 0 0 0 0 0 0 ...
##  $ average_airflow_l_s: num [1:3355] 39.3 40.3 38.6 39.8 43.4 ...
##  $ airflow_cf         : num [1:3355] 1 1 1 1 1 1 1 1 1 1 ...
##  $ wind_cf            : num [1:3355] 1.16 1.13 1.11 1.02 1.11 ...
##  $ Time_Bin           : chr [1:3355] "Bin_4" "Bin_5" "Bin_6" "Bin_1" ...
##  $ Week_Number        : num [1:3355] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Filtered_Status    : chr [1:3355] "Kept" "Kept" "Kept" "Kept" ...
##  $ row_id             : num [1:3355] 1 2 3 4 5 6 7 8 9 10 ...
##  $ GF_Week            : chr [1:3355] "Week_1" "Week_1" "Week_1" "Week_1" ...
##  $ Phase              : chr [1:3355] "Phase_1" "Phase_1" "Phase_1" "Phase_1" ...
##  $ Window_14d         : chr [1:3355] "W1_2" "W1_2" "W1_2" "W1_2" ...
##  $ Visit_Date         : POSIXct[1:3355], format: "2025-06-27" "2025-06-27" ...
##  $ Cycle              : chr [1:3355] "Cycle 1 (Rapid growth)" "Cycle 1 (Rapid growth)" "Cycle 1 (Rapid growth)" "Cycle 1 (Rapid growth)" ...
cat("\nVariable names:\n")
## 
## Variable names:
names(raw_data)
##  [1] "cow_id"              "grazing_treatment"   "start_time"         
##  [4] "end_time"            "good_data_duration"  "hour_of_day"        
##  [7] "co2_massflow_g_d"    "ch4_massflow_g_d"    "o2_massflow_g_d"    
## [10] "average_airflow_l_s" "airflow_cf"          "wind_cf"            
## [13] "Time_Bin"            "Week_Number"         "Filtered_Status"    
## [16] "row_id"              "GF_Week"             "Phase"              
## [19] "Window_14d"          "Visit_Date"          "Cycle"

1.2 1.2 Initial Data Exploration

cat("Unique cows:", n_distinct(raw_data$cow_id), "\n")
## Unique cows: 37
cat("Unique treatments:", n_distinct(raw_data$grazing_treatment), "\n")
## Unique treatments: 2
cat("Date range:", min(raw_data$date), "to", max(raw_data$date), "\n\n")
## Date range: Inf to -Inf
# Summary of CH4 measurements
cat("CH4 Summary Statistics:\n")
## CH4 Summary Statistics:
summary(raw_data$ch4_massflow_g_d)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   47.15  258.52  319.44  320.95  379.70  591.19
# Check for missing values
cat("\nMissing values:\n")
## 
## Missing values:
missing_vals <- colSums(is.na(raw_data))
if(any(missing_vals > 0)) print(missing_vals[missing_vals > 0]) else cat("No missing values found\n")
## No missing values found

1.3 1.3 Filter for ≥ 20 Observations per Cow

# Count observations per cow
obs_per_cow <- raw_data %>%
  group_by(cow_id) %>%
  summarise(n_obs = n(), .groups = 'drop')

cat("Observations per cow:\n")
## Observations per cow:
cat("Min:", min(obs_per_cow$n_obs), "\n")
## Min: 16
cat("Q1:", quantile(obs_per_cow$n_obs, 0.25), "\n")
## Q1: 79
cat("Median:", median(obs_per_cow$n_obs), "\n")
## Median: 94
cat("Mean:", round(mean(obs_per_cow$n_obs), 2), "\n")
## Mean: 90.68
cat("Q3:", quantile(obs_per_cow$n_obs, 0.75), "\n")
## Q3: 109
cat("Max:", max(obs_per_cow$n_obs), "\n\n")
## Max: 146
# Keep only cows with >= 20 observations
cows_to_keep <- obs_per_cow %>%
  filter(n_obs >= 20) %>%
  pull(cow_id)

data_filtered <- raw_data %>%
  filter(cow_id %in% cows_to_keep)

cat("=== FILTERING RESULTS ===\n")
## === FILTERING RESULTS ===
cat("Before filtering:\n")
## Before filtering:
cat("  Observations:", nrow(raw_data), "\n")
##   Observations: 3355
cat("  Cows:", n_distinct(raw_data$cow_id), "\n\n")
##   Cows: 37
cat("After filtering (≥20 observations):\n")
## After filtering (≥20 observations):
cat("  Observations:", nrow(data_filtered), "\n")
##   Observations: 3306
cat("  Cows:", n_distinct(data_filtered$cow_id), "\n\n")
##   Cows: 34
cat("Removed:\n")
## Removed:
cat("  Observations:", nrow(raw_data) - nrow(data_filtered), 
    "({", round(((nrow(raw_data) - nrow(data_filtered))/nrow(raw_data))*100, 2), "}%)\n")
##   Observations: 49 ({ 1.46 }%)
cat("  Cows:", n_distinct(raw_data$cow_id) - n_distinct(data_filtered$cow_id), "\n")
##   Cows: 3

1.4 1.4 Prepare Data for Mixed Model

# Check what columns are available
cat("Available columns:\n")
## Available columns:
print(names(data_filtered))
##  [1] "cow_id"              "grazing_treatment"   "start_time"         
##  [4] "end_time"            "good_data_duration"  "hour_of_day"        
##  [7] "co2_massflow_g_d"    "ch4_massflow_g_d"    "o2_massflow_g_d"    
## [10] "average_airflow_l_s" "airflow_cf"          "wind_cf"            
## [13] "Time_Bin"            "Week_Number"         "Filtered_Status"    
## [16] "row_id"              "GF_Week"             "Phase"              
## [19] "Window_14d"          "Visit_Date"          "Cycle"
cat("\n")
# Check what columns are available
cat("Available columns:\n")
## Available columns:
print(names(data_filtered))
##  [1] "cow_id"              "grazing_treatment"   "start_time"         
##  [4] "end_time"            "good_data_duration"  "hour_of_day"        
##  [7] "co2_massflow_g_d"    "ch4_massflow_g_d"    "o2_massflow_g_d"    
## [10] "average_airflow_l_s" "airflow_cf"          "wind_cf"            
## [13] "Time_Bin"            "Week_Number"         "Filtered_Status"    
## [16] "row_id"              "GF_Week"             "Phase"              
## [19] "Window_14d"          "Visit_Date"          "Cycle"
cat("\n")
# Prepare analysis dataset - be flexible with column selection
analysis_data <- data_filtered %>%
  mutate(
    # Ensure proper data types
    cow_id = as.factor(cow_id),
    grazing_treatment = as.factor(grazing_treatment),
    # Use average_airflow_l_s if available
    airflow = ifelse("average_airflow_l_s" %in% names(.), average_airflow_l_s, NA)
  ) %>%
  # Remove rows with missing values in key variables
  filter(!is.na(ch4_massflow_g_d))

# Check what optional columns exist and filter if they do
if ("average_airflow_l_s" %in% names(analysis_data)) {
  analysis_data <- analysis_data %>% filter(!is.na(average_airflow_l_s))
  cat("✓ Airflow column found (average_airflow_l_s) and filtered\n")
} else if ("airflow" %in% names(analysis_data)) {
  analysis_data <- analysis_data %>% filter(!is.na(airflow))
  cat("✓ Airflow column found and filtered\n")
} else {
  cat("⚠ Airflow column not found\n")
}
## ✓ Airflow column found (average_airflow_l_s) and filtered
cat("Analysis dataset prepared:\n")
## Analysis dataset prepared:
cat("  Observations:", nrow(analysis_data), "\n")
##   Observations: 3306
cat("  Cows:", n_distinct(analysis_data$cow_id), "\n")
##   Cows: 34
cat("  Variables:", ncol(analysis_data), "\n\n")
##   Variables: 22
cat("Summary of key variables:\n")
## Summary of key variables:
cat("CH4 Emissions (g/d):\n")
## CH4 Emissions (g/d):
cat("  Min:", min(analysis_data$ch4_massflow_g_d, na.rm=TRUE), "\n")
##   Min: 47.14538
cat("  Mean:", round(mean(analysis_data$ch4_massflow_g_d, na.rm=TRUE), 2), "\n")
##   Mean: 321.58
cat("  Max:", max(analysis_data$ch4_massflow_g_d, na.rm=TRUE), "\n\n")
##   Max: 591.1865
if ("average_airflow_l_s" %in% names(analysis_data)) {
  cat("Airflow (L/s):\n")
  cat("  Min:", min(analysis_data$average_airflow_l_s, na.rm=TRUE), "\n")
  cat("  Mean:", round(mean(analysis_data$average_airflow_l_s, na.rm=TRUE), 2), "\n")
  cat("  Max:", max(analysis_data$average_airflow_l_s, na.rm=TRUE), "\n\n")
}
## Airflow (L/s):
##   Min: 38.29936 
##   Mean: 42.44 
##   Max: 45.09738
if ("hour_of_day" %in% names(analysis_data)) {
  cat("Hour of Day:\n")
  cat("  Unique hours:", n_distinct(analysis_data$hour_of_day), "\n")
  cat("  Hours present:", paste(sort(unique(analysis_data$hour_of_day)), collapse=", "), "\n")
}
## Hour of Day:
##   Unique hours: 24 
##   Hours present: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23

2 2. Mixed Model Fitting

2.1 2.1 Fit Mixed Model

# Model specification:
# CH4 ~ Animal ID (fixed) + Airflow + Visit_Duration (covariates)
#       + (1|Date) + (1|Animal_ID:Hour_of_Day)

cat("Fitting mixed model...\n")
## Fitting mixed model...
cat("Testing covariates for significance...\n\n")
## Testing covariates for significance...
if ("average_airflow_l_s" %in% names(analysis_data)) {
  model_airflow <- lmer(ch4_massflow_g_d ~ average_airflow_l_s + (1|cow_id), 
                         data = analysis_data, REML = TRUE)
  airflow_pval <- summary(model_airflow)$coefficients["average_airflow_l_s", "Pr(>|t|)"]
  cat("Airflow p-value:", airflow_pval, "\n")
  include_airflow <- !is.na(airflow_pval) && airflow_pval < 0.10
} else {
  cat("Airflow not available\n")
  include_airflow <- FALSE
}
## Airflow p-value: 0.2906946
# Check for hour_of_day random effect
if ("hour_of_day" %in% names(analysis_data)) {
  cat("✓ Hour of day available for random effect\n")
  include_hour <- TRUE
} else {
  cat("⚠ Hour of day not available\n")
  include_hour <- FALSE
}
## ✓ Hour of day available for random effect
cat("\n")
# Fit full model with available covariates
cat("Fitting full model...\n")
## Fitting full model...
# Build model formula dynamically
formula_str <- "ch4_massflow_g_d ~ cow_id"
if (include_airflow) formula_str <- paste(formula_str, "+ average_airflow_l_s")

# Add random effects
if (include_hour) {
  formula_str <- paste(formula_str, "+ (1|hour_of_day) + (1|cow_id:hour_of_day)")
} else {
  formula_str <- paste(formula_str, "+ (1|cow_id)")
}

model_full <- lmer(as.formula(formula_str), data = analysis_data, REML = TRUE)

cat("Model formula:", formula_str, "\n\n")
## Model formula: ch4_massflow_g_d ~ cow_id + (1|hour_of_day) + (1|cow_id:hour_of_day)
cat("Full model summary:\n")
## Full model summary:
print(summary(model_full))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: as.formula(formula_str)
##    Data: analysis_data
## 
## REML criterion at convergence: 38289.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6395 -0.6769 -0.0253  0.6384  3.3194 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  cow_id:hour_of_day (Intercept)  153.4   12.39   
##  hour_of_day        (Intercept)  172.3   13.12   
##  Residual                       6552.9   80.95   
## Number of obs: 3306, groups:  cow_id:hour_of_day, 653; hour_of_day, 24
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  385.691      8.139  246.052  47.391  < 2e-16 ***
## cow_id258K  -130.992     11.096  330.907 -11.805  < 2e-16 ***
## cow_id260C   -82.946     11.009  402.525  -7.535 3.26e-13 ***
## cow_id260K   -71.297     10.962  337.201  -6.504 2.82e-10 ***
## cow_id264F   -91.543     11.123  309.641  -8.230 5.26e-15 ***
## cow_id266D   -43.687     12.332  435.373  -3.543 0.000439 ***
## cow_id266H   -29.537     11.686  400.720  -2.528 0.011870 *  
## cow_id270G    -3.675     10.931  371.103  -0.336 0.736922    
## cow_id286J   -91.940     11.422  334.290  -8.049 1.47e-14 ***
## cow_id290G   -21.712     11.898  394.518  -1.825 0.068771 .  
## cow_id296G   -73.356     11.325  398.841  -6.477 2.75e-10 ***
## cow_id296H   -20.850     11.537  404.146  -1.807 0.071454 .  
## cow_id306K  -161.194     12.504  377.676 -12.892  < 2e-16 ***
## cow_id312E   -51.101     11.766  347.853  -4.343 1.84e-05 ***
## cow_id312F   -39.587     10.529  361.750  -3.760 0.000198 ***
## cow_id314J   -63.659     10.928  297.529  -5.825 1.48e-08 ***
## cow_id322C   -56.089     13.346  599.940  -4.203 3.04e-05 ***
## cow_id322F   -53.113     11.569  347.590  -4.591 6.17e-06 ***
## cow_id326G   -58.247     11.618  404.316  -5.014 8.01e-07 ***
## cow_id332H   -38.410     11.549  431.695  -3.326 0.000957 ***
## cow_id338J   -62.249     11.382  322.590  -5.469 9.08e-08 ***
## cow_id364B   -34.789     12.388  482.972  -2.808 0.005184 ** 
## cow_id368J    -9.823     12.144  410.802  -0.809 0.419056    
## cow_id370F   -58.271     11.958  381.689  -4.873 1.62e-06 ***
## cow_id370H   -84.429     11.525  395.562  -7.326 1.34e-12 ***
## cow_id398G    -4.093     13.425  575.746  -0.305 0.760587    
## cow_id400G   -62.761     12.930  585.502  -4.854 1.55e-06 ***
## cow_id404E   -49.743     11.893  408.386  -4.183 3.53e-05 ***
## cow_id410C   -76.330     12.175  497.823  -6.269 7.87e-10 ***
## cow_id426D   -77.457     12.132  425.870  -6.385 4.50e-10 ***
## cow_id436E   -97.532     11.970  414.694  -8.148 4.40e-15 ***
## cow_id442B   -75.487     14.716  828.609  -5.130 3.62e-07 ***
## cow_id446K  -107.308     11.729  452.814  -9.149  < 2e-16 ***
## cow_id454C   -55.801     12.669  530.239  -4.405 1.28e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2 2.2 Model Diagnostics

# Extract residuals and fitted values
residuals_df <- data.frame(
  fitted = fitted(model_full),
  residuals = residuals(model_full),
  standardized = residuals(model_full) / sd(residuals(model_full))
)

# Plot diagnostics
par(mfrow = c(2, 2))

# Residuals vs Fitted
plot(residuals_df$fitted, residuals_df$residuals, 
     main = "Residuals vs Fitted",
     xlab = "Fitted Values", ylab = "Residuals",
     pch = 16, col = rgb(0, 0, 1, 0.3))
abline(h = 0, lty = 2, col = "red")

# Q-Q plot
qqnorm(residuals_df$standardized, main = "Q-Q Plot of Residuals")
qqline(residuals_df$standardized, col = "red")

# Scale-Location plot
plot(residuals_df$fitted, sqrt(abs(residuals_df$standardized)),
     main = "Scale-Location Plot",
     xlab = "Fitted Values", ylab = expression(sqrt("|Standardized Residuals|")),
     pch = 16, col = rgb(0, 0, 1, 0.3))

# Histogram of residuals
hist(residuals_df$residuals, breaks = 50, main = "Distribution of Residuals",
     xlab = "Residuals", col = "steelblue", border = "black")

par(mfrow = c(1, 1))

# Shapiro-Wilk Test for Normality
shapiro_test <- shapiro.test(residuals_df$residuals)

cat("\n=== SHAPIRO-WILK TEST FOR NORMALITY ===\n")
## 
## === SHAPIRO-WILK TEST FOR NORMALITY ===
cat("Test Statistic (W):", round(shapiro_test$statistic, 6), "\n")
## Test Statistic (W): 0.998546
cat("P-value:", format(shapiro_test$p.value, scientific = TRUE), "\n")
## P-value: 4.79554e-03
if (shapiro_test$p.value > 0.05) {
  cat("Result: Residuals ARE normally distributed (p > 0.05)\n")
} else {
  cat("Result: Residuals are NOT normally distributed (p < 0.05)\n")
}
## Result: Residuals are NOT normally distributed (p < 0.05)
cat("Interpretation: The Shapiro-Wilk test assesses whether residuals follow a normal distribution.\n")
## Interpretation: The Shapiro-Wilk test assesses whether residuals follow a normal distribution.
cat("This is an important assumption for mixed models and linear regression.\n\n")
## This is an important assumption for mixed models and linear regression.
# Box-Cox Transformation Analysis (R equivalent to SAS transformation recommendations)
library(MASS)

cat("=== BOX-COX TRANSFORMATION ANALYSIS ===\n")
## === BOX-COX TRANSFORMATION ANALYSIS ===
cat("(Equivalent to SAS transformation diagnostics)\n\n")
## (Equivalent to SAS transformation diagnostics)
# Prepare data for Box-Cox (must be positive)
ch4_positive <- analysis_data$ch4_massflow_g_d
if (any(ch4_positive <= 0)) {
  ch4_positive <- ch4_positive + abs(min(ch4_positive)) + 1
}

# Perform Box-Cox transformation
boxcox_result <- boxcox(ch4_positive ~ 1, lambda = seq(-2, 2, by = 0.1))

# Extract optimal lambda
optimal_lambda <- boxcox_result$x[which.max(boxcox_result$y)]
cat("Optimal Lambda (λ):", round(optimal_lambda, 4), "\n\n")
## Optimal Lambda (λ): 0.8687
# Interpretation
cat("Transformation Recommendations:\n")
## Transformation Recommendations:
if (optimal_lambda > 0.8 && optimal_lambda < 1.2) {
  cat("  → No transformation needed (λ ≈ 1 = original scale)\n")
} else if (optimal_lambda > -0.2 && optimal_lambda < 0.2) {
  cat("  → Use LOG TRANSFORMATION (λ = 0)\n")
} else if (optimal_lambda > 0.4 && optimal_lambda < 0.6) {
  cat("  → Use SQUARE ROOT TRANSFORMATION (λ = 0.5)\n")
} else if (optimal_lambda < 0) {
  cat("  → Use RECIPROCAL or INVERSE TRANSFORMATION (λ = -1 or λ = ", round(optimal_lambda, 2), ")\n")
} else {
  cat("  → Use POWER TRANSFORMATION with λ = ", round(optimal_lambda, 4), "\n")
}
##   → No transformation needed (λ ≈ 1 = original scale)
cat("\nCommon Box-Cox Interpretations:\n")
## 
## Common Box-Cox Interpretations:
cat("  λ = -1  :  1/y (reciprocal)\n")
##   λ = -1  :  1/y (reciprocal)
cat("  λ = -0.5:  1/sqrt(y)\n")
##   λ = -0.5:  1/sqrt(y)
cat("  λ = 0   :  log(y) (natural logarithm)\n")
##   λ = 0   :  log(y) (natural logarithm)
cat("  λ = 0.5 :  sqrt(y) (square root)\n")
##   λ = 0.5 :  sqrt(y) (square root)
cat("  λ = 1   :  No transformation (y)\n")
##   λ = 1   :  No transformation (y)

3 3. Extract LSMEANS (Least Squares Means)

3.1 3.1 Calculate LSMEANS for Each Animal

# Calculate LSMEANS for each cow
emmeans_result <- emmeans(model_full, specs = "cow_id")

# Convert to dataframe
lsmeans_df <- as.data.frame(emmeans_result)

# Check column names
cat("Columns in emmeans result:\n")
## Columns in emmeans result:
print(names(lsmeans_df))
## [1] "cow_id"    "emmean"    "SE"        "df"        "asymp.LCL" "asymp.UCL"
cat("\n")
# Rename columns if they exist - be flexible
if ("emmean" %in% names(lsmeans_df)) {
  lsmeans_df <- lsmeans_df %>% rename(lsmeans_ch4 = emmean)
} else if ("estimate" %in% names(lsmeans_df)) {
  lsmeans_df <- lsmeans_df %>% rename(lsmeans_ch4 = estimate)
}

if ("SE" %in% names(lsmeans_df)) {
  lsmeans_df <- lsmeans_df %>% rename(se_lsmeans = SE)
} else if ("se" %in% names(lsmeans_df)) {
  lsmeans_df <- lsmeans_df %>% rename(se_lsmeans = se)
}

# Rename CI columns if they exist
if ("asymp.LCL" %in% names(lsmeans_df)) {
  lsmeans_df <- lsmeans_df %>% rename(ci_lower = asymp.LCL, ci_upper = asymp.UCL)
}

# Add treatment information
lsmeans_df <- lsmeans_df %>%
  left_join(
    analysis_data %>% distinct(cow_id, grazing_treatment),
    by = "cow_id"
  )

cat("LSMEANS results (first 10 rows):\n")
## LSMEANS results (first 10 rows):
print(head(lsmeans_df, 10))
##  cow_id lsmeans_ch4 se_lsmeans  df ci_lower ci_upper grazing_treatment
##  252H      385.6910   8.138539 Inf 369.7397 401.6422 K2_CON           
##  258K      254.6987   8.558034 Inf 237.9253 271.4722 K1_AMP           
##  260C      302.7453   8.407833 Inf 286.2662 319.2243 K2_CON           
##  260K      314.3936   8.373252 Inf 297.9823 330.8049 K2_CON           
##  264F      294.1481   8.595011 Inf 277.3022 310.9940 K1_AMP           
##  266D      342.0039  10.139652 Inf 322.1305 361.8772 K2_CON           
##  266H      356.1535   9.318966 Inf 337.8887 374.4184 K1_AMP           
##  270G      382.0163   8.302880 Inf 365.7430 398.2897 K2_CON           
##  286J      293.7513   8.979321 Inf 276.1522 311.3505 K1_AMP           
##  290G      363.9788   9.575201 Inf 345.2117 382.7458 K1_AMP           
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95
cat("\nLSMEANS Summary Statistics by Treatment:\n")
## 
## LSMEANS Summary Statistics by Treatment:
lsmeans_summary <- lsmeans_df %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_cows = n(),
    mean_lsmeans = mean(lsmeans_ch4),
    sd_lsmeans = sd(lsmeans_ch4),
    se_lsmeans = mean(se_lsmeans),
    min_lsmeans = min(lsmeans_ch4),
    max_lsmeans = max(lsmeans_ch4),
    .groups = 'drop'
  )

print(lsmeans_summary)
## # A tibble: 2 × 7
##   grazing_treatment n_cows mean_lsmeans sd_lsmeans se_lsmeans min_lsmeans
##   <fct>              <int>        <dbl>      <dbl>      <dbl>       <dbl>
## 1 K1_AMP                18         314.       34.7       9.53        224.
## 2 K2_CON                16         339.       32.2       9.46        278.
## # ℹ 1 more variable: max_lsmeans <dbl>

3.2 3.2 Calculate Arithmetic Means for Comparison

# Calculate arithmetic means for each cow
arithmetic_means <- analysis_data %>%
  group_by(cow_id, grazing_treatment) %>%
  summarise(
    n_obs = n(),
    arithmetic_mean = mean(ch4_massflow_g_d),
    sd_arithmetic = sd(ch4_massflow_g_d),
    se_arithmetic = sd(ch4_massflow_g_d) / sqrt(n()),
    min_ch4 = min(ch4_massflow_g_d),
    max_ch4 = max(ch4_massflow_g_d),
    .groups = 'drop'
  )

cat("Arithmetic Means (first 10 rows):\n")
## Arithmetic Means (first 10 rows):
print(head(arithmetic_means, 10))
## # A tibble: 10 × 8
##    cow_id grazing_treatment n_obs arithmetic_mean sd_arithmetic se_arithmetic
##    <fct>  <fct>             <int>           <dbl>         <dbl>         <dbl>
##  1 252H   K2_CON              134            382.          83.4          7.21
##  2 258K   K1_AMP              121            251.          75.8          6.89
##  3 260C   K2_CON              120            300.          83.6          7.63
##  4 260K   K2_CON              126            309.          67.6          6.02
##  5 264F   K1_AMP              122            290.          69.3          6.27
##  6 266D   K2_CON               80            337.          98.6         11.0 
##  7 266H   K1_AMP               97            351.          89.1          9.05
##  8 270G   K2_CON              125            379.          83.3          7.45
##  9 286J   K1_AMP              109            290.          88.0          8.43
## 10 290G   K1_AMP               92            359.          87.1          9.08
## # ℹ 2 more variables: min_ch4 <dbl>, max_ch4 <dbl>
cat("\nArithmetic Means Summary Statistics by Treatment:\n")
## 
## Arithmetic Means Summary Statistics by Treatment:
arithmetic_summary <- arithmetic_means %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_cows = n(),
    mean_arithmetic = mean(arithmetic_mean),
    sd_arithmetic_mean = sd(arithmetic_mean),
    se_arithmetic_mean = mean(se_arithmetic),
    min_arithmetic = min(arithmetic_mean),
    max_arithmetic = max(arithmetic_mean),
    .groups = 'drop'
  )

print(arithmetic_summary)
## # A tibble: 2 × 7
##   grazing_treatment n_cows mean_arithmetic sd_arithmetic_mean se_arithmetic_mean
##   <fct>              <int>           <dbl>              <dbl>              <dbl>
## 1 K1_AMP                18            309.               34.7               8.60
## 2 K2_CON                16            335.               32.1               8.70
## # ℹ 2 more variables: min_arithmetic <dbl>, max_arithmetic <dbl>

4 4. Comparison: LSMEANS vs Arithmetic Mean

4.1 4.1 Side-by-Side Comparison

# Merge LSMEANS and arithmetic means
# Select only columns that exist
cols_to_select <- c("cow_id", "grazing_treatment", "lsmeans_ch4", "se_lsmeans")
cols_to_select <- cols_to_select[cols_to_select %in% names(lsmeans_df)]

# Add CI columns if they exist
if ("ci_lower" %in% names(lsmeans_df)) cols_to_select <- c(cols_to_select, "ci_lower")
if ("ci_upper" %in% names(lsmeans_df)) cols_to_select <- c(cols_to_select, "ci_upper")

comparison_df <- lsmeans_df %>%
  dplyr::select(any_of(c("cow_id", "grazing_treatment", "lsmeans_ch4", "se_lsmeans", "ci_lower", "ci_upper"))) %>%
  left_join(
    arithmetic_means %>% dplyr::select(cow_id, arithmetic_mean, se_arithmetic, n_obs),
    by = "cow_id"
  ) %>%
  mutate(
    difference = lsmeans_ch4 - arithmetic_mean,
    percent_difference = (difference / arithmetic_mean) * 100,
    abs_percent_diff = abs(percent_difference)
  )

cat("Method Comparison (first 15 cows):\n")
## Method Comparison (first 15 cows):
print(head(comparison_df, 15) %>%
  dplyr::select(cow_id, grazing_treatment, lsmeans_ch4, arithmetic_mean, 
         difference, percent_difference, n_obs))
##    cow_id grazing_treatment lsmeans_ch4 arithmetic_mean difference
## 1    252H            K2_CON    385.6910        382.2435   3.447495
## 2    258K            K1_AMP    254.6987        250.6393   4.059478
## 3    260C            K2_CON    302.7453        299.8183   2.926987
## 4    260K            K2_CON    314.3936        309.3664   5.027201
## 5    264F            K1_AMP    294.1481        289.7465   4.401663
## 6    266D            K2_CON    342.0039        337.1750   4.828904
## 7    266H            K1_AMP    356.1535        350.9666   5.186968
## 8    270G            K2_CON    382.0163        379.1610   2.855334
## 9    286J            K1_AMP    293.7513        289.9971   3.754251
## 10   290G            K1_AMP    363.9788        358.5515   5.427311
## 11   296G            K2_CON    312.3353        307.6448   4.690512
## 12   296H            K2_CON    364.8406        359.2441   5.596501
## 13   306K            K1_AMP    224.4965        219.5593   4.937180
## 14   312E            K1_AMP    334.5905        329.7435   4.846931
## 15   312F            K2_CON    346.1040        344.4250   1.679013
##    percent_difference n_obs
## 1           0.9019107   134
## 2           1.6196497   121
## 3           0.9762536   120
## 4           1.6249989   126
## 5           1.5191429   122
## 6           1.4321656    80
## 7           1.4779096    97
## 8           0.7530663   125
## 9           1.2945823   109
## 10          1.5136769    92
## 11          1.5246519   108
## 12          1.5578548   101
## 13          2.2486769    79
## 14          1.4699093    98
## 15          0.4874828   146
cat("\n=== DIFFERENCE SUMMARY BY TREATMENT ===\n")
## 
## === DIFFERENCE SUMMARY BY TREATMENT ===
difference_summary <- comparison_df %>%
  group_by(grazing_treatment) %>%
  summarise(
    n_cows = n(),
    mean_lsmeans = mean(lsmeans_ch4),
    mean_arithmetic = mean(arithmetic_mean),
    mean_difference = mean(difference),
    mean_percent_diff = mean(percent_difference),
    sd_percent_diff = sd(percent_difference),
    max_abs_percent_diff = max(abs_percent_diff),
    .groups = 'drop'
  )

print(difference_summary)
## # A tibble: 2 × 8
##   grazing_treatment n_cows mean_lsmeans mean_arithmetic mean_difference
##   <fct>              <int>        <dbl>           <dbl>           <dbl>
## 1 K1_AMP                18         314.            309.            4.23
## 2 K2_CON                16         339.            335.            4.69
## # ℹ 3 more variables: mean_percent_diff <dbl>, sd_percent_diff <dbl>,
## #   max_abs_percent_diff <dbl>

4.2 4.2 Overall Comparison Statistics

cat("OVERALL COMPARISON STATISTICS:\n\n")
## OVERALL COMPARISON STATISTICS:
cat("Mean LSMEANS across all cows:", mean(comparison_df$lsmeans_ch4), "\n")
## Mean LSMEANS across all cows: 325.7201
cat("Mean Arithmetic Mean across all cows:", mean(comparison_df$arithmetic_mean), "\n")
## Mean Arithmetic Mean across all cows: 321.2739
cat("Mean Difference:", mean(comparison_df$difference), "\n")
## Mean Difference: 4.446239
cat("Mean Percent Difference:", mean(comparison_df$percent_difference), "%\n")
## Mean Percent Difference: 1.399553 %
cat("SD of Percent Difference:", sd(comparison_df$percent_difference), "%\n")
## SD of Percent Difference: 0.3723327 %
cat("Range of Percent Difference:", 
    min(comparison_df$percent_difference), "% to", 
    max(comparison_df$percent_difference), "%\n\n")
## Range of Percent Difference: 0.4874828 % to 2.248677 %
# Statistical test: paired comparison
t_test <- t.test(comparison_df$lsmeans_ch4, comparison_df$arithmetic_mean, paired = TRUE)
cat("Paired t-test (LSMEANS vs Arithmetic Mean):\n")
## Paired t-test (LSMEANS vs Arithmetic Mean):
cat("  t-statistic:", t_test$statistic, "\n")
##   t-statistic: 22.67372
cat("  p-value:", t_test$p.value, "\n")
##   p-value: 1.070343e-21
cat("  Conclusion:", ifelse(t_test$p.value < 0.05, 
                            "SIGNIFICANT difference", 
                            "NO significant difference"), "\n")
##   Conclusion: SIGNIFICANT difference

5 5. 24-Hour Diurnal Patterns: AM vs LSMEANS

5.1 5.1 Individual Cow 24-Hour Patterns (5 Random Cows)

# Set seed for reproducibility
set.seed(42)

# Select 5 random cows
random_cows <- sample(unique(analysis_data$cow_id), size = 5, replace = FALSE)

cat("Selected 5 random cows for 24-hour comparison:\n")
## Selected 5 random cows for 24-hour comparison:
for (i in seq_along(random_cows)) {
  treatment <- analysis_data %>% 
    filter(cow_id == random_cows[i]) %>% 
    pull(grazing_treatment) %>% 
    first()
  cat(i, ".", as.character(random_cows[i]), "(", as.character(treatment), ")\n")
}
## 1 . 252H ( K2_CON )
## 2 . 322C ( K1_AMP )
## 3 . 332H ( K2_CON )
## 4 . 266D ( K2_CON )
## 5 . 264F ( K1_AMP )
cat("\n")
# For each cow, create a 24-hour plot
for (cow in random_cows) {
  # Get treatment for this cow
  cow_treatment <- analysis_data %>% 
    filter(cow_id == cow) %>% 
    pull(grazing_treatment) %>% 
    first()
  
  # Calculate hourly Arithmetic Mean for this cow
  hourly_am <- analysis_data %>%
    filter(cow_id == cow) %>%
    group_by(hour_of_day) %>%
    summarise(
      am_ch4 = mean(ch4_massflow_g_d, na.rm = TRUE),
      n_obs = n(),
      .groups = 'drop'
    )
  
  # Calculate LSMEANS for this cow across all hours
  # LSMEANS = fixed effect for this cow + hour effect + average random effects
  hourly_lsmeans <- data.frame(
    hour_of_day = 0:23,
    cow_id = cow,
    grazing_treatment = cow_treatment
  )
  
  # Get predictions from model (using population average for random effects)
  hourly_lsmeans$lsmeans_ch4 <- predict(model_full, 
                                         newdata = hourly_lsmeans, 
                                         re.form = ~(1|hour_of_day), 
                                         allow.new.levels = TRUE)
  
  # Merge AM and LSMEANS
  plot_data <- hourly_am %>%
    left_join(hourly_lsmeans %>% dplyr::select(hour_of_day, lsmeans_ch4), 
              by = "hour_of_day") %>%
    pivot_longer(cols = c(am_ch4, lsmeans_ch4),
                 names_to = "method",
                 values_to = "ch4") %>%
    mutate(method = factor(method, levels = c("am_ch4", "lsmeans_ch4"),
                           labels = c("Arithmetic Mean", "LSMEANS")))
  
  # Create plot
  p <- ggplot(plot_data, aes(x = hour_of_day, y = ch4, color = method, linetype = method)) +
    geom_line(size = 1.2) +
    geom_point(size = 2.5, aes(fill = method), shape = 21, color = "white") +
    scale_x_continuous(breaks = seq(0, 23, 2), limits = c(-0.5, 23.5)) +
    scale_color_manual(values = c("Arithmetic Mean" = "#1f77b4", "LSMEANS" = "#ff7f0e")) +
    scale_linetype_manual(values = c("Arithmetic Mean" = "solid", "LSMEANS" = "dashed")) +
    scale_fill_manual(values = c("Arithmetic Mean" = "#1f77b4", "LSMEANS" = "#ff7f0e")) +
    labs(
      title = paste("Cow", cow, "-", cow_treatment),
      subtitle = "24-Hour CH4 Pattern: Arithmetic Mean vs LSMEANS",
      x = "Hour of Day",
      y = "CH4 Emissions (g/d)",
      color = "Method",
      linetype = "Method",
      fill = "Method"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold", size = 13),
      plot.subtitle = element_text(size = 10, color = "gray60"),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "right",
      panel.grid.major = element_line(color = "gray90"),
      panel.grid.minor = element_line(color = "gray95")
    )
  
  print(p)
  cat("\n")
}

cat("\n=== INDIVIDUAL COW SUMMARY ===\n")
## 
## === INDIVIDUAL COW SUMMARY ===
cat("This section shows 5 randomly selected cows.\n")
## This section shows 5 randomly selected cows.
cat("- SOLID line = Arithmetic Mean (calculated from actual measurements)\n")
## - SOLID line = Arithmetic Mean (calculated from actual measurements)
cat("- DASHED line = LSMEANS (from mixed model, accounting for diurnal variation)\n")
## - DASHED line = LSMEANS (from mixed model, accounting for diurnal variation)
cat("- Both methods account for all 24 hours of data\n")
## - Both methods account for all 24 hours of data
cat("- The difference between them shows how much diurnal adjustment occurs\n")
## - The difference between them shows how much diurnal adjustment occurs

5.2 5.2 Herd-Level 24-Hour Comparison: K1_AMP vs K2_CON

# Calculate hourly means by treatment (Arithmetic Mean)
hourly_treatment_am <- analysis_data %>%
  group_by(grazing_treatment, hour_of_day) %>%
  summarise(
    am_ch4 = mean(ch4_massflow_g_d, na.rm = TRUE),
    n_obs = n(),
    .groups = 'drop'
  )

# Calculate LSMEANS for each treatment by hour
# Create prediction data for both treatments across all hours
pred_data <- expand_grid(
  cow_id = unique(analysis_data$cow_id)[1],  # Use first cow as template
  grazing_treatment = c("K1_AMP", "K2_CON"),
  hour_of_day = 0:23
)

# Get LSMEANS predictions
pred_data$lsmeans_ch4 <- predict(model_full, 
                                  newdata = pred_data, 
                                  re.form = ~(1|hour_of_day),
                                  allow.new.levels = TRUE)

# Summarize LSMEANS by treatment and hour
hourly_treatment_lsmeans <- pred_data %>%
  group_by(grazing_treatment, hour_of_day) %>%
  summarise(
    lsmeans_ch4 = mean(lsmeans_ch4, na.rm = TRUE),
    .groups = 'drop'
  )

# Merge AM and LSMEANS
hourly_treatment_comparison <- hourly_treatment_am %>%
  left_join(hourly_treatment_lsmeans, by = c("grazing_treatment", "hour_of_day"))

# Pivot for ggplot
plot_data_herd <- hourly_treatment_comparison %>%
  pivot_longer(cols = c(am_ch4, lsmeans_ch4),
               names_to = "method",
               values_to = "ch4") %>%
  mutate(method = factor(method, levels = c("am_ch4", "lsmeans_ch4"),
                         labels = c("Arithmetic Mean", "LSMEANS")))

# Create herd-level plot
p_herd <- ggplot(plot_data_herd, aes(x = hour_of_day, y = ch4, color = grazing_treatment, linetype = method)) +
  geom_line(size = 1.3) +
  geom_point(size = 2.5, aes(fill = grazing_treatment), shape = 21, color = "white") +
  scale_x_continuous(breaks = seq(0, 23, 2), limits = c(-0.5, 23.5)) +
  scale_color_manual(values = c("K1_AMP" = "forestgreen", "K2_CON" = "steelblue"),
                     labels = c("K1_AMP" = "K1_AMP (Adaptive Multipaddock)", "K2_CON" = "K2_CON (Control)")) +
  scale_linetype_manual(values = c("Arithmetic Mean" = "solid", "LSMEANS" = "dashed")) +
  scale_fill_manual(values = c("K1_AMP" = "forestgreen", "K2_CON" = "steelblue")) +
  labs(
    title = "Herd-Level 24-Hour CH4 Pattern: K1_AMP vs K2_CON",
    subtitle = "Solid lines = Arithmetic Mean | Dashed lines = LSMEANS",
    x = "Hour of Day",
    y = "Average CH4 Emissions (g/d)",
    color = "Treatment",
    linetype = "Method",
    fill = "Treatment"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11, color = "gray60"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right",
    panel.grid.major = element_line(color = "gray90"),
    panel.grid.minor = element_line(color = "gray95")
  )

print(p_herd)

# Print detailed summary statistics
cat("\n=== HERD-LEVEL 24-HOUR SUMMARY ===\n\n")
## 
## === HERD-LEVEL 24-HOUR SUMMARY ===
# K1_AMP
k1_am <- hourly_treatment_am %>% filter(grazing_treatment == "K1_AMP")
k1_lsm <- hourly_treatment_lsmeans %>% filter(grazing_treatment == "K1_AMP")

cat("K1_AMP (Adaptive Multipaddock):\n")
## K1_AMP (Adaptive Multipaddock):
cat("  Arithmetic Mean:\n")
##   Arithmetic Mean:
cat("    Mean CH4:", round(mean(k1_am$am_ch4, na.rm=TRUE), 2), "g/d\n")
##     Mean CH4: 305.39 g/d
cat("    Range:", round(min(k1_am$am_ch4, na.rm=TRUE), 2), "-", round(max(k1_am$am_ch4, na.rm=TRUE), 2), "g/d\n")
##     Range: 258.75 - 355.5 g/d
cat("    Peak hour:", k1_am$hour_of_day[which.max(k1_am$am_ch4)], 
    "(" , round(max(k1_am$am_ch4, na.rm=TRUE), 2), "g/d)\n")
##     Peak hour: 1 ( 355.5 g/d)
cat("  LSMEANS:\n")
##   LSMEANS:
cat("    Mean CH4:", round(mean(k1_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d\n")
##     Mean CH4: 385.69 g/d
cat("    Range:", round(min(k1_lsm$lsmeans_ch4, na.rm=TRUE), 2), "-", round(max(k1_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d\n")
##     Range: 370.14 - 407.95 g/d
cat("    Peak hour:", k1_lsm$hour_of_day[which.max(k1_lsm$lsmeans_ch4)],
    "(" , round(max(k1_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d)\n\n")
##     Peak hour: 0 ( 407.95 g/d)
# K2_CON
k2_am <- hourly_treatment_am %>% filter(grazing_treatment == "K2_CON")
k2_lsm <- hourly_treatment_lsmeans %>% filter(grazing_treatment == "K2_CON")

cat("K2_CON (Control):\n")
## K2_CON (Control):
cat("  Arithmetic Mean:\n")
##   Arithmetic Mean:
cat("    Mean CH4:", round(mean(k2_am$am_ch4, na.rm=TRUE), 2), "g/d\n")
##     Mean CH4: 346.92 g/d
cat("    Range:", round(min(k2_am$am_ch4, na.rm=TRUE), 2), "-", round(max(k2_am$am_ch4, na.rm=TRUE), 2), "g/d\n")
##     Range: 283.95 - 404.77 g/d
cat("    Peak hour:", k2_am$hour_of_day[which.max(k2_am$am_ch4)],
    "(" , round(max(k2_am$am_ch4, na.rm=TRUE), 2), "g/d)\n")
##     Peak hour: 0 ( 404.77 g/d)
cat("  LSMEANS:\n")
##   LSMEANS:
cat("    Mean CH4:", round(mean(k2_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d\n")
##     Mean CH4: 385.69 g/d
cat("    Range:", round(min(k2_lsm$lsmeans_ch4, na.rm=TRUE), 2), "-", round(max(k2_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d\n")
##     Range: 370.14 - 407.95 g/d
cat("    Peak hour:", k2_lsm$hour_of_day[which.max(k2_lsm$lsmeans_ch4)],
    "(" , round(max(k2_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d)\n\n")
##     Peak hour: 0 ( 407.95 g/d)
cat("Treatment Difference (K2_CON - K1_AMP):\n")
## Treatment Difference (K2_CON - K1_AMP):
cat("  AM Mean difference:", round(mean(k2_am$am_ch4, na.rm=TRUE) - mean(k1_am$am_ch4, na.rm=TRUE), 2), "g/d\n")
##   AM Mean difference: 41.53 g/d
cat("  LSMEANS Mean difference:", round(mean(k2_lsm$lsmeans_ch4, na.rm=TRUE) - mean(k1_lsm$lsmeans_ch4, na.rm=TRUE), 2), "g/d\n")
##   LSMEANS Mean difference: 0 g/d

6 5. Visualizations

6.1 5.1 LSMEANS vs Arithmetic Mean Scatter Plot

# Create scatter plot
p <- ggplot(comparison_df, aes(x = arithmetic_mean, y = lsmeans_ch4, 
                          color = grazing_treatment, size = n_obs)) +
  geom_point(alpha = 0.6) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red", size = 1)

# Add error bars if CI columns exist
if ("ci_lower" %in% names(comparison_df) && "ci_upper" %in% names(comparison_df)) {
  p <- p + geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 1, alpha = 0.3)
}

p <- p +
  labs(
    title = "LSMEANS vs Arithmetic Mean CH4 Emissions",
    subtitle = "Points on diagonal line indicate perfect agreement",
    x = "Arithmetic Mean (g/d)",
    y = "LSMEANS (g/d)",
    color = "Treatment",
    size = "N Observations"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11, color = "gray50")
  )

print(p)

6.2 5.2 Percent Difference Distribution

ggplot(comparison_df, aes(x = percent_difference, fill = grazing_treatment)) +
  geom_histogram(bins = 30, alpha = 0.7, position = "dodge") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = mean(comparison_df$percent_difference), 
             linetype = "solid", color = "blue", size = 1) +
  labs(
    title = "Distribution of Percent Difference: (LSMEANS - Arithmetic Mean) / Arithmetic Mean",
    subtitle = "Red dashed line = 0%, Blue solid line = mean difference",
    x = "Percent Difference (%)",
    y = "Frequency",
    fill = "Treatment"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 12),
    plot.subtitle = element_text(size = 10, color = "gray50")
  )

6.3 5.3 Box Plot Comparison by Treatment

comparison_long <- comparison_df %>%
  pivot_longer(
    cols = c(lsmeans_ch4, arithmetic_mean),
    names_to = "method",
    values_to = "ch4_value"
  ) %>%
  mutate(method = factor(method, levels = c("arithmetic_mean", "lsmeans_ch4"),
                         labels = c("Arithmetic Mean", "LSMEANS")))

ggplot(comparison_long, aes(x = grazing_treatment, y = ch4_value, fill = method)) +
  geom_boxplot(alpha = 0.7) +
  labs(
    title = "CH4 Estimates: LSMEANS vs Arithmetic Mean by Treatment",
    x = "Grazing Treatment",
    y = "CH4 Emissions (g/d)",
    fill = "Method"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

6.4 5.4 Confidence Interval Comparison

# Only create this plot if CI columns exist
if ("ci_lower" %in% names(comparison_df) && "ci_upper" %in% names(comparison_df)) {
  # Sort by LSMEANS value for better visualization
  comparison_sorted <- comparison_df %>%
    arrange(lsmeans_ch4)

  p <- ggplot(comparison_sorted, aes(x = reorder(cow_id, lsmeans_ch4))) +
    geom_point(aes(y = arithmetic_mean, color = "Arithmetic Mean"), size = 2) +
    geom_point(aes(y = lsmeans_ch4, color = "LSMEANS"), size = 2) +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper, color = "LSMEANS"), 
                  width = 0.3, alpha = 0.5) +
    labs(
      title = "CH4 Estimates with 95% Confidence Intervals (LSMEANS)",
      subtitle = "Points show estimates, error bars show 95% CI for LSMEANS",
      x = "Animal ID (sorted by LSMEANS)",
      y = "CH4 Emissions (g/d)",
      color = "Method"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold", size = 12),
      axis.text.x = element_text(angle = 90, size = 7),
      legend.position = "right"
    )
  
  print(p)
} else {
  cat("Confidence interval columns not available for this plot\n")
}


7 6. Summary and Conclusions

7.1 6.1 Key Findings

cat("=== LSMEANS MIXED MODEL ANALYSIS: SUMMARY ===\n\n")
## === LSMEANS MIXED MODEL ANALYSIS: SUMMARY ===
cat("1. DATASET CHARACTERISTICS:\n")
## 1. DATASET CHARACTERISTICS:
cat("   - Initial observations (3SD filtered):", nrow(raw_data), "\n")
##    - Initial observations (3SD filtered): 3355
cat("   - Final analysis observations (≥20 visits):", nrow(analysis_data), "\n")
##    - Final analysis observations (≥20 visits): 3306
cat("   - Number of animals analyzed:", n_distinct(analysis_data$cow_id), "\n")
##    - Number of animals analyzed: 34
cat("   - Grazing treatments:", paste(unique(analysis_data$grazing_treatment), collapse = ", "), "\n\n")
##    - Grazing treatments: K2_CON, K1_AMP
cat("2. LSMEANS RESULTS:\n")
## 2. LSMEANS RESULTS:
cat("   - Mean LSMEANS CH4:", round(mean(comparison_df$lsmeans_ch4), 2), "g/d\n")
##    - Mean LSMEANS CH4: 325.72 g/d
cat("   - SD of LSMEANS:", round(sd(comparison_df$lsmeans_ch4), 2), "g/d\n")
##    - SD of LSMEANS: 35.51 g/d
cat("   - Range:", round(min(comparison_df$lsmeans_ch4), 2), "to", 
    round(max(comparison_df$lsmeans_ch4), 2), "g/d\n\n")
##    - Range: 224.5 to 385.69 g/d
cat("3. COMPARISON WITH ARITHMETIC MEAN:\n")
## 3. COMPARISON WITH ARITHMETIC MEAN:
cat("   - Mean Arithmetic Mean:", round(mean(comparison_df$arithmetic_mean), 2), "g/d\n")
##    - Mean Arithmetic Mean: 321.27 g/d
cat("   - Mean Difference (LSMEANS - AM):", round(mean(comparison_df$difference), 2), "g/d\n")
##    - Mean Difference (LSMEANS - AM): 4.45 g/d
cat("   - Mean Percent Difference:", round(mean(comparison_df$percent_difference), 3), "%\n")
##    - Mean Percent Difference: 1.4 %
cat("   - Conclusion: ",
    ifelse(abs(mean(comparison_df$percent_difference)) < 1,
           "LSMEANS and arithmetic mean are practically equivalent (< 1% difference)",
           "Notable difference between methods"), "\n\n")
##    - Conclusion:  Notable difference between methods
cat("4. STATISTICAL SIGNIFICANCE:\n")
## 4. STATISTICAL SIGNIFICANCE:
cat("   - Paired t-test p-value:", round(t_test$p.value, 4), "\n")
##    - Paired t-test p-value: 0
cat("   - Result:", ifelse(t_test$p.value < 0.05,
                           "Methods produce significantly different estimates",
                           "No significant difference between methods"), "\n\n")
##    - Result: Methods produce significantly different estimates
cat("5. METHODOLOGICAL ADVANTAGES OF LSMEANS:\n")
## 5. METHODOLOGICAL ADVANTAGES OF LSMEANS:
cat("   ✓ Uses all observations (no data loss)\n")
##    ✓ Uses all observations (no data loss)
cat("   ✓ Statistically accounts for diurnal variation\n")
##    ✓ Statistically accounts for diurnal variation
cat("   ✓ Provides standard errors for animal-level estimates\n")
##    ✓ Provides standard errors for animal-level estimates
cat("   ✓ Allows weighted analysis based on estimation uncertainty\n")
##    ✓ Allows weighted analysis based on estimation uncertainty
cat("   ✓ In grazing data: equivalent to arithmetic mean (literature: 0.1% diff)\n")
##    ✓ In grazing data: equivalent to arithmetic mean (literature: 0.1% diff)
cat("   ✓ Maintains methodological consistency across different trial types\n")
##    ✓ Maintains methodological consistency across different trial types

7.2 6.2 Recommendations

cat("=== RECOMMENDATIONS FOR FUTURE ANALYSIS ===\n\n")
## === RECOMMENDATIONS FOR FUTURE ANALYSIS ===
cat("1. Use LSMEANS as the primary method for CH4 estimation because:\n")
## 1. Use LSMEANS as the primary method for CH4 estimation because:
cat("   - Provides proper statistical accounting for diurnal variation\n")
##    - Provides proper statistical accounting for diurnal variation
cat("   - Maintains all observations without arbitrary binning\n")
##    - Maintains all observations without arbitrary binning
cat("   - Generates uncertainty estimates for each animal\n")
##    - Generates uncertainty estimates for each animal
cat("   - Enables weighted analyses if needed\n\n")
##    - Enables weighted analyses if needed
cat("2. For comparison purposes:\n")
## 2. For comparison purposes:
cat("   - Report both LSMEANS and arithmetic mean estimates\n")
##    - Report both LSMEANS and arithmetic mean estimates
cat("   - Document the < 1% difference in grazing data\n")
##    - Document the < 1% difference in grazing data
cat("   - Use LSMEANS standard errors for downstream statistical tests\n\n")
##    - Use LSMEANS standard errors for downstream statistical tests
cat("3. Next steps:\n")
## 3. Next steps:
cat("   - Apply weighted analysis using LSMEANS SE if residual SD improves\n")
##    - Apply weighted analysis using LSMEANS SE if residual SD improves
cat("   - Compare with time-of-day method results\n")
##    - Compare with time-of-day method results
cat("   - Validate model fit with additional diagnostic plots\n")
##    - Validate model fit with additional diagnostic plots
cat("   - Consider animal-level covariates (e.g., weight, breed) if available\n")
##    - Consider animal-level covariates (e.g., weight, breed) if available

8 7. Appendix: Model Specification Details

8.1 7.1 Complete Model Formula

cat("Mixed Model Specification:\n\n")
## Mixed Model Specification:
cat("CH4_massflow_g_d ~ cow_id + airflow + visit_duration + (1|date) + (1|cow_id:hour_of_day)\n\n")
## CH4_massflow_g_d ~ cow_id + airflow + visit_duration + (1|date) + (1|cow_id:hour_of_day)
cat("Where:\n")
## Where:
cat("  - CH4_massflow_g_d: Methane emissions (g/day) - DEPENDENT VARIABLE\n")
##   - CH4_massflow_g_d: Methane emissions (g/day) - DEPENDENT VARIABLE
cat("  - cow_id: Animal identifier - FIXED EFFECT (estimates each animal's emission level)\n")
##   - cow_id: Animal identifier - FIXED EFFECT (estimates each animal's emission level)
cat("  - airflow: Air flow rate - COVARIATE\n")
##   - airflow: Air flow rate - COVARIATE
cat("  - visit_duration: Duration of measurement - COVARIATE\n")
##   - visit_duration: Duration of measurement - COVARIATE
cat("  - (1|date): Random intercept for measurement date\n")
##   - (1|date): Random intercept for measurement date
cat("  - (1|cow_id:hour_of_day): Random intercept for hour of day nested within animal\n\n")
##   - (1|cow_id:hour_of_day): Random intercept for hour of day nested within animal
cat("Model fitted using REML (Restricted Maximum Likelihood) estimation\n")
## Model fitted using REML (Restricted Maximum Likelihood) estimation
cat("Package: lme4 (R)\n")
## Package: lme4 (R)

8.2 7.2 Session Information

cat("R Session Information:\n")
## R Session Information:
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_Canada.utf8  LC_CTYPE=English_Canada.utf8   
## [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=English_Canada.utf8    
## 
## time zone: America/Edmonton
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] MASS_7.3-65      kableExtra_1.4.0 knitr_1.50       gridExtra_2.3   
##  [5] lmerTest_3.1-3   emmeans_2.0.0    lme4_1.1-37      Matrix_1.7-3    
##  [9] readxl_1.4.5     lubridate_1.9.4  forcats_1.0.1    stringr_1.5.1   
## [13] dplyr_1.1.4      purrr_1.1.0      readr_2.1.5      tidyr_1.3.1     
## [17] tibble_3.3.0     ggplot2_3.5.2    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.52           bslib_0.9.0        
##  [4] lattice_0.22-7      tzdb_0.5.0          numDeriv_2016.8-1.1
##  [7] vctrs_0.6.5         tools_4.5.1         Rdpack_2.6.4       
## [10] generics_0.1.4      pkgconfig_2.0.3     RColorBrewer_1.1-3 
## [13] lifecycle_1.0.4     compiler_4.5.1      farver_2.1.2       
## [16] textshaping_1.0.3   htmltools_0.5.8.1   sass_0.4.10        
## [19] yaml_2.3.10         pillar_1.11.0       nloptr_2.2.1       
## [22] jquerylib_0.1.4     cachem_1.1.0        reformulas_0.4.2   
## [25] boot_1.3-31         nlme_3.1-168        tidyselect_1.2.1   
## [28] digest_0.6.37       mvtnorm_1.3-3       stringi_1.8.7      
## [31] labeling_0.4.3      splines_4.5.1       fastmap_1.2.0      
## [34] grid_4.5.1          cli_3.6.5           magrittr_2.0.3     
## [37] utf8_1.2.6          withr_3.0.2         scales_1.4.0       
## [40] timechange_0.3.0    estimability_1.5.1  rmarkdown_2.29     
## [43] cellranger_1.1.0    hms_1.1.3           evaluate_1.0.5     
## [46] rbibutils_2.3       viridisLite_0.4.2   rlang_1.1.6        
## [49] Rcpp_1.1.0          xtable_1.8-4        glue_1.8.0         
## [52] xml2_1.4.0          svglite_2.2.2       rstudioapi_0.17.1  
## [55] minqa_1.2.8         jsonlite_2.0.0      R6_2.6.1           
## [58] systemfonts_1.3.1