knitr::opts_chunk$set(message = FALSE, warning = FALSE, fig.align = "center")

Part 1: Media Selection

Objectives - To determine which media best supports the growth and identification of Klebsiella pneumoniae, in terms of observable differences, for use during the main experimental phase.

Media being compared - MacConkey Agar with ampicillin (MacA), Cystine Lactose Electrolyte Deficient agar (CLED), Simmons Citrate agar with Inositol and ampicillin (SCA-I + Amp) and without ampicillin (SCA-I).

Genera being compared - Milk and faecal Klebsiella, Citrobacter/Enterobacter and Escherichia. These comparisons allow qualitative determination of the phenotypic separation between phylogenetically close genera.

1. Packages and Data Import

library(readxl)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(patchwork)
library(grid)

Media_Observations_Exploratory_Phase <- read_excel("Media Observations - Exploratory Phase.xlsx", 
    sheet = "Growth stacked")
str(Media_Observations_Exploratory_Phase)

## tibble [164 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Media            : chr [1:164] "CLED" "CLED" "CLED" "CLED" ...
##  $ Genus            : chr [1:164] "Milk Klebsiella" "Milk Klebsiella" "Milk Klebsiella" "Milk Klebsiella" ...
##  $ Growth           : num [1:164] 2 2 2 2 2 2 2 2 2 2 ...
##  $ Precipitation    : num [1:164] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ID               : chr [1:164] "DA22-0002" "DA22-0051" "DA22-0147" "DA22-0149" ...
##  $ Degree of Mucoidy: chr [1:164] "Yes" "Yes" "No" "No" ...
##  $ Colour           : chr [1:164] "Y" "Y" "Y" "Y" ...
##  $ Elevation        : chr [1:164] "Domed" "Domed" "Domed" "Domed" ...

Media_Observations_Exploratory_Phase$Genus <- factor(Media_Observations_Exploratory_Phase$Genus)
Media_Observations_Exploratory_Phase$Media <- factor(Media_Observations_Exploratory_Phase$Media)

2. Data Exploration

Media_Observations_Exploratory_Phase <- Media_Observations_Exploratory_Phase %>%
  mutate(Genus = factor(Genus,
                        levels = c("Milk Klebsiella",
                                   "Faecal Klebsiella",
                                   "Citrobacter/Enterobacter",
                                   "Escherichia")))

# Greyscale palette
grey_palette <- c("#4d4d4d", "#7f7f7f", "#b2b2b2", "#d9d9d9")

# Plot generator functions
plot_growth <- function(media){
  Media_Observations_Exploratory_Phase %>% 
    filter(Media == media) %>%
    group_by(Genus) %>%
    summarise(mean = mean(Growth), sd = sd(Growth), .groups = 'drop') %>%
    ggplot(aes(Genus, mean, fill = Genus)) +
    geom_col() +
    geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2) +
    scale_fill_manual(values = grey_palette) +
    labs(x = NULL, y = "Growth ± SD") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

}

plot_elevation <- function(media){
  Media_Observations_Exploratory_Phase %>%
    filter(Media == media) %>%
    ggplot(aes(Elevation, fill = Genus)) +
    geom_bar(position = "dodge") +
    scale_fill_manual(values = grey_palette) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 6)) +
    labs(x = NULL, y = "No. of Isolates") +
    theme_minimal() +
    theme(axis.text.x = element_text(size = 7))
}

plot_mucoidy <- function(media){
  Media_Observations_Exploratory_Phase %>%
    filter(Media == media) %>%
    ggplot(aes(`Degree of Mucoidy`, fill = Genus)) +
    geom_bar(position = "dodge") +
    scale_fill_manual(values = grey_palette) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 6)) +
    scale_x_discrete(labels = c(
      "No" = "0",
      "Somewhat" = "1",
      "Yes" = "2"
    )) +
    labs(x = NULL, y = "No. of Isolates") +
    theme_minimal() +
    theme(axis.text.x = element_text(size = 7))
}

# List of media
media_list <- c("CLED", "MacA", "SCAI", "SCAI + Amp")

# Build 4 rows × 3 columns
plot_grid <- map(media_list, ~{
  plot_growth(.x) | plot_elevation(.x) | plot_mucoidy(.x)
})

# Combine rows
combined_plots <- wrap_plots(plot_grid, ncol = 1) + 
  plot_layout(guides = "collect") &          
  theme(legend.position = "right") 

# Add column titles
column_labels <- plot_annotation(
  title = NULL,
  subtitle = NULL,
  tag_levels = NULL,
  theme = theme(
    plot.margin = margin(5, 5, 0, 5),
    plot.title = element_text(size = 12, face = "bold")
  )
) &
  theme()

# Final combined figure with row labels via layout
final_figure <- (combined_plots) +
  plot_layout(
    widths = c(0.15, 1), 
    heights = c(1)
  ) &
  theme(plot.margin = margin(10, 10, 10, 10))

# Draw row labels manually
grid.newpage()
grid.draw(final_figure)
n_rows <- length(media_list)

for (i in seq_along(media_list)) {
  grid.text(
    media_list[i],
    x = unit(0.01, "npc"),   # Move further left (adjust -0.03 to -0.06 if needed)
    y = unit(1 - (i - 0.5) / n_rows, "npc"),  # Center vertically in each row
    rot = 90,
    gp = gpar(fontsize = 12, fontface = "bold")
  )
}

grid.text("Growth ± SD", x = 0.17, y = 0.99, gp = gpar(fontsize = 12, fontface = "bold"))
grid.text("Elevation", x = 0.42, y = 0.99, gp = gpar(fontsize = 12, fontface = "bold"))
grid.text("Degree of Mucoidy", x = 0.68, y = 0.99, gp = gpar(fontsize = 12, fontface = "bold"))

This bar graph illustrates the mean growth scores (± standard deviation (SD)) of milk and faecal Klebsiella, Citrobacter/Enterobacter and Escherichia on various media types, the number of isolates exhibiting different colony elevation shapes by genus and media, and the number of isolates exhibiting degrees of mucoidy by genus and media.
Milk and faecal Klebsiella show consistently high mean growth scores across all media types.
Citrobacter/Enterobacter show an observable decrease in growth on SCA-I and SCA-I + Amp compared to other media.
Escherichia shows high mean growth scores on CLED and MacA, but does not grow on SCA-I or SCA-I + Amp.
All media types would sufficiently support Klebsiella growth.
On CLED and MacA, milk and faecal Klebsiella are consistently different in appearance compared to Citrobacter/Enterobacter and Escherichia.
On CLED only, some milk and faecal Klebsiella isolates are described as mucoid.

Media_Observations_Exploratory_Phase %>%
  group_by(Genus, Media) %>%
  summarise(mean_precip = mean(Precipitation),
            sd_precip = sd(Precipitation), .groups = 'drop') %>%
  ggplot(aes(x = Genus, y = mean_precip, fill = Media)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_errorbar(aes(ymin = mean_precip - sd_precip,
                    ymax = mean_precip + sd_precip),
                width = 0.2, position = position_dodge(0.9)) +
scale_fill_manual(values = c("#4d4d4d", "#7f7f7f", "#b2b2b2", "#d9d9d9")) +
  labs(title = "", y = "Mean Precipitation ± SD") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

This bar graph shows the mean precipitation score (± standard deviation (SD)) by genus and media, where 0 = no precipitate, 1 = precipitate present around minority of colonies, and 2 = precipitate present around majority of colonies.
Escherichia is the only genus to produce a precipitate on any media. This occurs on MacA media only.

3. Conclusions

CLED media does not appear to inhibit Klebsiella growth. However, when highly mucoid, individual colonies may merge together on the plate, making them more difficult to count. For this reason, CLED media will not be used.
The suppressed Escherichia growth on both SCA-I and SCA-I + Amp is beneficial as species in this genus may be naturally present in bedding material. However, the consistent formation of precipitate is a strong, observable characteristic that can be used to distinguish between Escherichia and Klebsiella on MacA.
Colony elevation can be used to further distinguish between Klebsiella and other genera on MacA.
Given these factors and the improved accessibility of MacA compared to SCA-I and SCA-I + Amp, MacA is a the most optimal choice. If available, SCAI will be used as a comparison to MacA during the main experimental phase.

—

Part 2: Bedding Inoculation Model Development

Objective - To determine the most optimal incubation conditions for supporting the growth of K. pneumoniae in sand and wood shavings.

Incubation conditions being compared - High (~90% RH) and low (~20% RH) humidity, container lids that are loose or off.

1. Packages and Data Import

library(lme4)
library(car)
library(ggsignif)
library(emmeans)
library(stringr)

Pilot <- read_excel("Bedding inoculation model development.xlsx", 
    sheet = "Pilot study collated")
str(Pilot)

## tibble [96 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Study           : num [1:96] 2.1 2.1 2.1 2.1 2.1 2.1 2.1 2.1 2.1 2.1 ...
##  $ Plate_ID        : chr [1:96] "1. Sand: off/L/F " "2. Sand: off/L/F " "1. Sand: off/L/M" "2. Sand: off/L/M " ...
##  $ Cap             : chr [1:96] "Off" "Off" "Off" "Off" ...
##  $ Humidity        : chr [1:96] "Low" "Low" "Low" "Low" ...
##  $ Sample          : chr [1:96] "F" "F" "M" "M" ...
##  $ Replicate       : num [1:96] 1 2 1 2 1 2 1 2 1 2 ...
##  $ Klebsiella_count: num [1:96] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Total_count     : num [1:96] 0 0 0 0 0 0 0 0 0 0 ...

2. Data Exploration and Model Development

df <- Pilot %>%
  mutate(
    Bedding = ifelse(str_detect(`Plate_ID`, "Sand"), "Sand", "Wood shavings"),
    Kleb_ratio = ifelse(Total_count > 0, Klebsiella_count / Total_count, NA_real_)
  )

df %>%
  group_by(Bedding, Cap, Humidity, Sample) %>%
  summarise(
    mean_Kleb = mean(Klebsiella_count),
    sd_Kleb = sd(Klebsiella_count),
    mean_ratio = mean(Kleb_ratio, na.rm = TRUE),
    n = n()
  )

## # A tibble: 24 × 8
## # Groups:   Bedding, Cap, Humidity [8]
##    Bedding Cap   Humidity Sample mean_Kleb sd_Kleb mean_ratio     n
##    <chr>   <chr> <chr>    <chr>      <dbl>   <dbl>      <dbl> <int>
##  1 Sand    Loose High     C          12      10.8     0.0518      4
##  2 Sand    Loose High     F          33.5    35.3     0.0853      4
##  3 Sand    Loose High     M          27.2    27.1     0.0600      4
##  4 Sand    Loose Low      C           3       4.76    0.00419     4
##  5 Sand    Loose Low      F           3.75    1.26    0.00721     4
##  6 Sand    Loose Low      M          14      16.7     0.0253      4
##  7 Sand    Off   High     C          21.2    30.0     0.0624      4
##  8 Sand    Off   High     F          36.8    21.0     0.106       4
##  9 Sand    Off   High     M          25.8    31.2     0.0472      4
## 10 Sand    Off   Low      C           0       0     NaN           4
## # ℹ 14 more rows

df_low <- subset(df, Humidity == "Low")

ggplot(df_low, aes(x = Cap, y = Klebsiella_count, fill = Cap)) +
  geom_boxplot(width = 0.6, color = "black") +
  facet_grid(Bedding ~ Sample) +
  scale_fill_grey(start = 0.8, end = 0.4) +      # light → dark grey
  theme_minimal(base_size = 12) +
  labs(
    title = "",
    y = "Klebsiella count (CFU)",
    x = "Cap configuration"
  ) +
  coord_cartesian(ylim = c(0, 80)) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(size = 0.3, colour = "grey80"),
    strip.text = element_text(face = "bold"),
    legend.position = "none",
    panel.spacing = unit(1.2, "cm")   # ⬅️ increase space between panels here
  )

This box and whisker plot compares Klebsiella counts across cap configurations at low humidity during bedding incubation. Since there is minimal growth in all low humidity groups, this condition will not be selected for the main experimental phase.

df_high <- df %>%
  filter(Humidity == "High")

df_high %>%
  group_by(Bedding, Cap) %>%
  summarise(sum = sum(Klebsiella_count), n = n())

## # A tibble: 4 × 4
## # Groups:   Bedding [2]
##   Bedding       Cap     sum     n
##   <chr>         <chr> <dbl> <int>
## 1 Sand          Loose   291    12
## 2 Sand          Off     335    12
## 3 Wood shavings Loose   394    12
## 4 Wood shavings Off     200    12

model_glmer <- glmer(Klebsiella_count ~ Bedding * Cap + (1 | Study),
                     data = df_high, family = poisson)
summary(model_glmer)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Klebsiella_count ~ Bedding * Cap + (1 | Study)
##    Data: df_high
## 
##      AIC      BIC   logLik deviance df.resid 
##   1405.6   1414.9   -697.8   1395.6       43 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.3283 -3.4098 -2.0764  0.9063 27.4427 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Study  (Intercept) 0.05295  0.2301  
## Number of obs: 48, groups:  Study, 2
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  3.16219    0.17307  18.271  < 2e-16 ***
## BeddingWood shavings         0.30303    0.07727   3.922 8.78e-05 ***
## CapOff                       0.14081    0.08010   1.758   0.0788 .  
## BeddingWood shavings:CapOff -0.81884    0.11811  -6.933 4.11e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) BddnWs CapOff
## BddngWdshvn -0.257              
## CapOff      -0.248  0.555       
## BddngWsh:CO  0.168 -0.654 -0.678

Anova(model_glmer, type = "III")

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: Klebsiella_count
##               Chisq Df Pr(>Chisq)    
## (Intercept) 333.830  1  < 2.2e-16 ***
## Bedding      15.382  1  8.784e-05 ***
## Cap           3.090  1    0.07877 .  
## Bedding:Cap  48.069  1  4.115e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emm_caps<- emmeans(model_glmer, ~ Cap | Bedding, type = "response")
emm_caps

## Bedding = Sand:
##  Cap   rate   SE  df asymp.LCL asymp.UCL
##  Loose 23.6 4.09 Inf      16.8      33.2
##  Off   27.2 4.67 Inf      19.4      38.1
## 
## Bedding = Wood shavings:
##  Cap   rate   SE  df asymp.LCL asymp.UCL
##  Loose 32.0 5.45 Inf      22.9      44.7
##  Off   16.2 2.88 Inf      11.5      23.0
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log scale

pairs(emm_caps)

## Bedding = Sand:
##  contrast    ratio     SE  df null z.ratio p.value
##  Loose / Off 0.869 0.0696 Inf    1  -1.758  0.0788
## 
## Bedding = Wood shavings:
##  contrast    ratio     SE  df null z.ratio p.value
##  Loose / Off 1.970 0.1710 Inf    1   7.812  <.0001
## 
## Tests are performed on the log scale

According to the Poisson mixed-effects model (GLMM), there is a significant interaction between bedding type and cap configuration during incubation under high humidity (χ²(1) = 48.069, p < 0.001). In wood shavings, there is significantly higher Klebsiella growth when caps are left loose. In sand, there is no significant difference between caps that are left loose compared to caps that are taken off during incubation.

df_high <- subset(df, Humidity == "High")

ggplot(df_high, aes(x = Cap, y = Klebsiella_count, fill = Cap)) +
  geom_boxplot(width = 0.6, color = "black") +  
  facet_grid(Bedding ~ Sample) +
  scale_fill_grey(start = 0.8, end = 0.5) +    
  theme_minimal(base_size = 12) +
  labs(
    title = "",
    y = "Klebsiella count (CFU)",
    x = "Cap configuration"
  ) +
  coord_cartesian(ylim = c(0, 80)) +
  geom_signif(
    data = subset(df_high, Bedding == "Wood shavings"),
    comparisons = list(c("Off", "Loose")),
    annotations = "<0.0001",
    y_position = 65,
    tip_length = 0.02,
    textsize = 4
  ) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(size = 0.3, colour = "grey80"),
    strip.text = element_text(face = "bold"),
    legend.position = "none",
    panel.spacing.y = unit(1.2, "cm")   # ⬅️ Adds more vertical space between 'Sand' and 'Wood shavings'
  )

This box and whisker plot compares Klebsiella counts across cap configurations at high humidity during bedding incubation.

3. Conclusions

Since there is significantly higher growth in wood shavings when caps are loose under high humidity, these condition will be selected for the main experimental phase.

—

Part 3: Experimental Data Analysis

1. Overview

Objective - To determine whether bedding treatments applied to wood shavings and sand reduce the growth of Klebsiella pneumoniae.

Primary model - Linear mixed model on log transformed K. pneumoniae CFU, with counts capped at 2500.

Hypothesis tests - Within-bedding pairwise comparisons among treatments (no cross-bedding comparisons).

Sensitivity analysis - The above repeated with log-transformed K. pneumoniae CFU with higher cap values and a GLMM.

E. coli model - Linear mixed model on log transformed E. coli CFU, with counts capped at 2500, as a validation for bedding inoculation and treatment procedures.

2. Packages and Data Import

library(tidyverse)
library(janitor)
library(lmerTest)     
library(multcomp)
library(multcompView)  
library(patchwork)
library(performance)   
library(insight)      
library(ggtext)        
library(ggpubr)        
library(car)
library(scales)
library(summarytools)
library(broom.mixed)
library(DHARMa)


raw <- read_excel("DATA Experiment recording.xlsx", 
    sheet = "Collated")

glimpse(raw)

## Rows: 162
## Columns: 11
## $ Experiment_rep   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Date_Plated      <dttm> 2025-07-08, 2025-07-08, 2025-07-08, 2025-07-08, 2025…
## $ Sample_number    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ Experiment_set   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Bedding          <chr> "Wood shavings", "Wood shavings", "Wood shavings", "W…
## $ Treatment        <chr> "Conditioner", "Conditioner", "Conditioner", "Disinfe…
## $ Replicate        <dbl> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,…
## $ Klebsiella_count <chr> "1176", "1016", "792", "TNTC", "1600", "69", "1144", …
## $ Total_CFU_count  <chr> "1185", "1016", "798", "TNTC", "1607", "69", "1156", …
## $ Kleb_capped      <dbl> 1176, 1016, 792, 2500, 1600, 69, 1144, 1336, 1680, 12…
## $ CFU_capped       <dbl> 1185, 1016, 798, 2500, 1607, 69, 1156, 1343, 1686, 12…

dat <- raw %>%
  mutate(
    Bedding      = str_squish(as.character(Bedding)),
    Conditioner  = str_squish(as.character(Treatment)),
    Bedding      = factor(Bedding, levels = c("Wood shavings", "Sand")),
    Treatment  = factor(Treatment, levels = c("Control", "Conditioner", "Disinfectant")),
    Experiment_rep   = as.factor(Experiment_rep),
    Experiment_set  = as.factor(Experiment_set),
    Replicate        = as.factor(`Replicate`)
  )
glimpse(dat)

## Rows: 162
## Columns: 12
## $ Experiment_rep   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Date_Plated      <dttm> 2025-07-08, 2025-07-08, 2025-07-08, 2025-07-08, 2025…
## $ Sample_number    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ Experiment_set   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Bedding          <fct> Wood shavings, Wood shavings, Wood shavings, Wood sha…
## $ Treatment        <fct> Conditioner, Conditioner, Conditioner, Disinfectant, …
## $ Replicate        <fct> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,…
## $ Klebsiella_count <chr> "1176", "1016", "792", "TNTC", "1600", "69", "1144", …
## $ Total_CFU_count  <chr> "1185", "1016", "798", "TNTC", "1607", "69", "1156", …
## $ Kleb_capped      <dbl> 1176, 1016, 792, 2500, 1600, 69, 1144, 1336, 1680, 12…
## $ CFU_capped       <dbl> 1185, 1016, 798, 2500, 1607, 69, 1156, 1343, 1686, 12…
## $ Conditioner      <chr> "Conditioner", "Conditioner", "Conditioner", "Disinfe…

3. Data Exploration

summary(dat$Kleb_capped)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    38.0   303.0   805.6  1524.0  2500.0

The median (303.0) is much lower than the mean (805.6) for capped Klebsiella values suggesting a right-skewed distribution.

print(summarytools::dfSummary(dat,valid.col=FALSE, graph.magnif=0.8, style="multiline"),method = "render")

Data Frame Summary

dat

Dimensions: 162 x 12
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Missing

Experiment_rep [factor]

1. 1

2. 2

3. 3

4. 4

5. 5

6. 6

36	(	22.2%	)
36	(	22.2%	)
36	(	22.2%	)
18	(	11.1%	)
18	(	11.1%	)
18	(	11.1%	)

0 (0.0%)

Date_Plated [POSIXct, POSIXt]

1. 2025-07-08

2. 2025-07-09

3. 2025-07-16

4. 2025-09-09

36	(	22.2%	)
36	(	22.2%	)
36	(	22.2%	)
54	(	33.3%	)

0 (0.0%)

Sample_number [numeric]

Mean (sd) : 21.5 (13.1)

min ≤ med ≤ max:

1 ≤ 21 ≤ 54

IQR (CV) : 20 (0.6)

54 distinct values

0 (0.0%)

Experiment_set [factor]

1. 1

2. 2

108	(	66.7%	)
54	(	33.3%	)

0 (0.0%)

Bedding [factor]

1. Wood shavings

2. Sand

81	(	50.0%	)
81	(	50.0%	)

0 (0.0%)

Treatment [factor]

1. Control

2. Conditioner

3. Disinfectant

54	(	33.3%	)
54	(	33.3%	)
54	(	33.3%	)

0 (0.0%)

Replicate [factor]

1. 1

2. 2

3. 3

54	(	33.3%	)
54	(	33.3%	)
54	(	33.3%	)

0 (0.0%)

Klebsiella_count [character]

1. TNTC
2. 0
3. 16
4. 240
5. 51
6. 1208
7. 2104
8. 2192
9. 29
10. 42
[ 96 others ]

22	(	13.6%	)
20	(	12.3%	)
3	(	1.9%	)
3	(	1.9%	)
3	(	1.9%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
101	(	62.3%	)

0 (0.0%)

Total_CFU_count [character]

1. TNTC
2. 0
3. 2
4. 117
5. 121
6. 123
7. 132
8. 142
9. 2104
10. 39
[ 110 others ]

22	(	13.6%	)
9	(	5.6%	)
4	(	2.5%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
2	(	1.2%	)
113	(	69.8%	)

0 (0.0%)

Kleb_capped [numeric]

Mean (sd) : 805.6 (932.5)

min ≤ med ≤ max:

0 ≤ 303 ≤ 2500

IQR (CV) : 1486 (1.2)

106 distinct values

0 (0.0%)

CFU_capped [numeric]

Mean (sd) : 826.1 (921.3)

min ≤ med ≤ max:

0 ≤ 368 ≤ 2500

IQR (CV) : 1476.5 (1.1)

120 distinct values

0 (0.0%)

Conditioner [character]

1. Conditioner

2. Control

3. Disinfectant

54	(	33.3%	)
54	(	33.3%	)
54	(	33.3%	)

0 (0.0%)

Generated by summarytools 1.1.4 (R version 4.3.0)
2026-01-28

dat %>%
  group_by(Bedding, Treatment) %>%
  summarise(
    mean_CFU = mean(CFU_capped, na.rm = TRUE),
    sd_CFU = sd(CFU_capped, na.rm = TRUE),
    mean_Kleb = mean(Kleb_capped, na.rm = TRUE),
    sd_Kleb = sd(Kleb_capped, na.rm = TRUE),
    n = n()
  )

## # A tibble: 6 × 7
## # Groups:   Bedding [2]
##   Bedding       Treatment    mean_CFU sd_CFU mean_Kleb sd_Kleb     n
##   <fct>         <fct>           <dbl>  <dbl>     <dbl>   <dbl> <int>
## 1 Wood shavings Control         769.    951.     754.     957.    27
## 2 Wood shavings Conditioner     508.    779.     499.     782.    27
## 3 Wood shavings Disinfectant   1786.    929.    1784.     933.    27
## 4 Sand          Control        1194.    889.    1168.     914.    27
## 5 Sand          Conditioner     615.    470.     551.     512.    27
## 6 Sand          Disinfectant     84.0   216.      77.9    203.    27

n = 27 for all groups so comparisons are balanced.

4. Linear Mixed Model (LMM) (non-transformed)

4.1 Comparing inclusion of random effect terms

# Model with Experiment_set as a random effect
m1 <- lmer(Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep) + (1 | Experiment_set), data = dat, REML = TRUE)

# Model without Experiment_set as a random effect
m0 <- lmer(Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep), data = dat, REML = TRUE)

# Compare variance components
VarCorr(m1)

##  Groups         Name        Std.Dev.
##  Experiment_rep (Intercept) 439.31  
##  Experiment_set (Intercept)   0.00  
##  Residual                   615.48

# Likelihood ratio test 
anova(update(m0, REML = FALSE), update(m1, REML = FALSE))

## Data: dat
## Models:
## update(m0, REML = FALSE): Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep)
## update(m1, REML = FALSE): Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep) + (1 | Experiment_set)
##                          npar    AIC  BIC  logLik deviance Chisq Df Pr(>Chisq)
## update(m0, REML = FALSE)    8 2566.3 2591 -1275.1   2550.3                    
## update(m1, REML = FALSE)    9 2568.3 2596 -1275.1   2550.3     0  1          1

Experiment set explains no variance. Use the simpler model.

4.2 Model assumptions

# Normality
shapiro.test(dat$Kleb_capped)

## 
##  Shapiro-Wilk normality test
## 
## data:  dat$Kleb_capped
## W = 0.78194, p-value = 2.938e-14

# Equal variance 
leveneTest(Kleb_capped ~ Bedding * Treatment, data = dat)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   5  4.6072 0.0005998 ***
##       156                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Not normal (p < 0.05)
Not equal variances (p < 0.05).
See log transformation comparison below (section 5). QQ plots indicate that normality is improved with transformation so transformed data will be used in the model.

4.3 Model diagnostics

performance::check_model(m0)

No severe collinearity, residuals showing some deviations at the tails, good fit for normality of random effects.
Slight non-linearity and heteroscedasticity
Test log transformation

5. LMM (Log transformed)

m_log <- lmer(log(Kleb_capped + 1) ~ Bedding * Treatment + (1 | Experiment_rep), data = dat, REML = TRUE)

5.1 Model assumptions

residual <- residuals(m_log)

shapiro.test(residual)

## 
##  Shapiro-Wilk normality test
## 
## data:  residual
## W = 0.98991, p-value = 0.303

residuals approximately normal (p > 0.05)

dat$Group <- interaction(dat$Bedding, dat$Treatment)

leveneTest(log(Kleb_capped + 1) ~ Group, data = dat)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  1.5927 0.1652
##       156

equal variances across groups (p > 0.05)

5.2 Model diagnostics

performance::check_model(m_log)

Normality of residuals, homogeneity of variance and linearity are improved for the transformed model. Proceeding with analysis for log transformed model.

5.3 Intraclass correlation coefficient

performance::icc(m_log)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.415
##   Unadjusted ICC: 0.221

41.5% of variation in Klebsiella CFU is due to differences between experimental reps, indicating moderate level of agreement in terms of how consistently the model distinguishes between groups.

5.4 Examine strains (Milk vs Faecal)

dat$Experiment_set <- factor(
  dat$Experiment_set,
  levels = c(1, 2),
  labels = c("Milk", "Faecal")
)

m_log_set <- lmer(log(Kleb_capped + 1) ~ Bedding * Treatment + Experiment_set + (1 | Experiment_rep),
  data = dat,
  REML = TRUE
)

emm_kleb_strain <- emmeans(
  m_log_set,
  ~ Treatment | Bedding * Experiment_set,
  type = "response"
)

kleb_table_strain <- summary(emm_kleb_strain) %>%
  as.data.frame() %>%                
  tibble::as_tibble() %>%          
  dplyr::mutate(
    est = round(response, 1),
    lwr = round(lower.CL, 1),
    upr = round(upper.CL, 1),
    CI  = paste0(est, " (", lwr, " \u2013 ", upr, ")")
  ) %>%
  dplyr::select(Experiment_set, Bedding, Treatment, CI) %>%
  tidyr::pivot_wider(
    names_from = Treatment,
    values_from = CI
  )
kleb_table_strain

## # A tibble: 4 × 5
##   Experiment_set Bedding       Control                Conditioner   Disinfectant
##   <fct>          <fct>         <chr>                  <chr>         <chr>       
## 1 Milk           Wood shavings 220.9 (58.3 – 829.3)   169.7 (44.6 … 1297.2 (346…
## 2 Milk           Sand          491.1 (130.6 – 1840.1) 162.8 (42.8 … 3.6 (0.2 – …
## 3 Faecal         Wood shavings 193.6 (49.4 – 749.9)   148.7 (37.8 … 1137 (293.9…
## 4 Faecal         Sand          430.4 (110.8 – 1664.1) 142.6 (36.2 … 3.1 (0.1 – …

5.5 Estimated marginal means (EMMs)

emm <- emmeans(m_log, ~ Treatment | Bedding, type = "response")
emm_df <- as.data.frame(emm)
emm_df

## Bedding = Wood shavings:
##  Treatment     response       SE   df lower.CL upper.CL
##  Control       213.6299 121.3362 7.75  56.8500  795.300
##  Conditioner   164.0904  93.3302 7.75  43.4975  611.503
##  Disinfectant 1254.4019 709.7133 7.75 337.3735 4656.676
## 
## Bedding = Sand:
##  Treatment     response       SE   df lower.CL upper.CL
##  Control       474.9223 269.0520 7.75 127.2772 1764.723
##  Conditioner   157.3656  89.5285 7.75  41.6849  586.553
##  Disinfectant    3.4955   2.5414 7.75   0.2117   15.679
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log(mu + 1) scale

These results describe the model-adjusted predictions for each conditioner within each bedding type.
Disinfectant resulted in the highest predicted Klebsiella growth in wood shavings, while it produced the lowest growth in sand.

5.6 Pairwise comparisons

pairs(emm, adjust = "tukey")

## Bedding = Wood shavings:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        1.300  0.5062 151    1   0.674  0.7790
##  Control / Disinfectant       0.171  0.0666 151    1  -4.536  <.0001
##  Conditioner / Disinfectant   0.132  0.0512 151    1  -5.210  <.0001
## 
## Bedding = Sand:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        3.005  1.1702 151    1   2.826  0.0147
##  Control / Disinfectant     105.866 41.2216 151    1  11.973  <.0001
##  Conditioner / Disinfectant  35.227 13.7167 151    1   9.148  <.0001
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

In wood shavings, disinfectant supported significantly higher Klebsiella counts than both Control and conditioner Predicted control and conditioner counts did not differ significantly.
In sand, all three groups differed significantly from one another, with disinfectant showing the lowest predicted counts.

5.7 Model fixed effects

fixed <- tidy(m_log, effects = "fixed", conf.int = TRUE)
fixed

## # A tibble: 6 × 9
##   effect term    estimate std.error statistic     df  p.value conf.low conf.high
##   <chr>  <chr>      <dbl>     <dbl>     <dbl>  <dbl>    <dbl>    <dbl>     <dbl>
## 1 fixed  (Inter…    5.37      0.565     9.50    8.02 1.22e- 5   4.07       6.67 
## 2 fixed  Beddin…    0.796     0.389     2.05  151.   4.26e- 2   0.0270     1.57 
## 3 fixed  Treatm…   -0.262     0.389    -0.674 151.   5.01e- 1  -1.03       0.507
## 4 fixed  Treatm…    1.77      0.389     4.54  151.   1.16e- 5   0.997      2.54 
## 5 fixed  Beddin…   -0.838     0.551    -1.52  151.   1.30e- 1  -1.93       0.250
## 6 fixed  Beddin…   -6.43      0.551   -11.7   151.   7.48e-23  -7.52      -5.34

Using the wood shavings control group as the reference, predicted means (CFU) were:

214.6 (Shavings:Control)
165.1 (Shavings:Conditioner)
1251.9 (Shavings:Disinfectant)
96.8 (Sand:Control)
32.2 (Sand:Conditioner)
0.91 (Sand:Disinfectant)

y_max_all <- max(emm_df$upper.CL, na.rm = TRUE)
ylim      <- c(0, y_max_all * 1.35)
y_breaks  <- pretty(ylim, n = 6)


treatment_labels <- c(
Control    = "Control",
ZorbiFresh = "Conditioner",
Doxall     = "Disinfectant"
)

# Wood Shavings

emm_shavings <- subset(emm_df, Bedding == "Wood shavings")
p1 <- ggplot(emm_shavings, aes(x = Treatment, y = response, fill = Treatment)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
y_position = y_max_all * 1.05, tip_length = 0.01, textsize = 3.5) +
geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
y_position = y_max_all * 1.15, tip_length = 0.01, textsize = 3.5) +
labs(
title = "Wood Shavings",
x = "Treatment",
y = expression("Predicted " * italic("K. pneumoniae") * " CFU")
) +
scale_fill_grey(start = 0.4, end = 0.8) +
scale_x_discrete(labels = treatment_labels) +
theme_bw(base_size = 12) +
theme(legend.position = "none") +
scale_y_continuous(limits = ylim, breaks = y_breaks)

# Sand

emm_sand <- subset(emm_df, Bedding == "Sand")
p2 <- ggplot(emm_sand, aes(x = Treatment, y = response, fill = Treatment)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
geom_signif(xmin = 1, xmax = 2, annotations = "p = 0.0147",
y_position = y_max_all * 1.05, tip_length = 0.01, textsize = 3.5) +
geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
y_position = y_max_all * 1.15, tip_length = 0.01, textsize = 3.5) +
geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
y_position = y_max_all * 1.25, tip_length = 0.01, textsize = 3.5) +
labs(title = "Sand", x = "Treatment", y = NULL) +
scale_fill_grey(start = 0.4, end = 0.8) +
scale_x_discrete(labels = treatment_labels) +
theme_bw(base_size = 12) +
theme(
legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_y_continuous(limits = ylim, breaks = y_breaks)

p1 + p2

y_max_all <- max(emm_df$upper.CL, na.rm = TRUE)
ylim      <- c(0, y_max_all * 1.35)
y_breaks  <- pretty(ylim, n = 6)

treatment_labels <- c(
  Control    = "Control",
  ZorbiFresh = "Conditioner",
  Doxall     = "Disinfectant"
)

base_theme <- theme_bw(base_size = 11) +
  theme(
    legend.position = "none",
    plot.margin = margin(5, 5, 5, 5)
  )

# Wood shavings
emm_shavings <- subset(emm_df, Bedding == "Wood shavings")

p1 <- ggplot(emm_shavings, aes(x = Treatment, y = response)) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15) +
  geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all * 1.05, tip_length = 0.01, textsize = 3.2) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all * 1.15, tip_length = 0.01, textsize = 3.2) +
  labs(
    title = "Wood shavings",
    x = "Treatment",
    y = expression("Model-predicted " * italic("K. pneumoniae") * " colony count")
  ) +
  scale_x_discrete(labels = treatment_labels) +
  scale_y_continuous(limits = ylim, breaks = y_breaks) +
  base_theme

# Sand
emm_sand <- subset(emm_df, Bedding == "Sand")

p2 <- ggplot(emm_sand, aes(x = Treatment, y = response)) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15) +
  geom_signif(xmin = 1, xmax = 2, annotations = "p = 0.0147",
              y_position = y_max_all * 1.05, tip_length = 0.01, textsize = 3.2) +
  geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all * 1.15, tip_length = 0.01, textsize = 3.2) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all * 1.25, tip_length = 0.01, textsize = 3.2) +
  labs(
    title = "Sand",
    x = "Treatment",
    y = NULL
  ) +
  scale_x_discrete(labels = treatment_labels) +
  scale_y_continuous(limits = ylim, breaks = y_breaks) +
  base_theme +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  )

p1 + p2

This box and whisker plot compares Predicted Klebsiella CFU across bedding treatment groups, showing significant differences with brackets.
There is a significant difference in Klebsiella CFU growth in opposite directions between disinfectant and conditioner groups in wood shavings, where disinfectant increases counts compared to conditioner In shavings, there is no significant difference between conditioner and the disinfectant
There is a significant difference in Klebsiella CFU growth in sand with conditioner compared to the control, and in sand with disinfectant compared to the control. However, counts are significantly lower when applying disinfectant compared to conditioner

6. Sensitivity analysis

6.1 Higher cap value for Klebsiella counts (5000)

# Cap TNTC at 5000
data <- dat %>%
  mutate(
    Kleb_capped_5000 = ifelse(Klebsiella_count == "TNTC", 5000, as.numeric(Klebsiella_count))
  )

# Fit model 
m_5000 <- lmer(log(Kleb_capped_5000 + 1) ~ Bedding * Treatment + (1 | Experiment_rep), data = data)

# ICC + diagnostics
icc(m_5000)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.425
##   Unadjusted ICC: 0.224

performance::check_model(m_5000)

# Pairwise comparisons
emm_5000 <- emmeans(m_log, ~ Treatment | Bedding, type = "response")
pairs(emm_5000, adjust = "tukey")

## Bedding = Wood shavings:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        1.300  0.5062 151    1   0.674  0.7790
##  Control / Disinfectant       0.171  0.0666 151    1  -4.536  <.0001
##  Conditioner / Disinfectant   0.132  0.0512 151    1  -5.210  <.0001
## 
## Bedding = Sand:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        3.005  1.1702 151    1   2.826  0.0147
##  Control / Disinfectant     105.866 41.2216 151    1  11.973  <.0001
##  Conditioner / Disinfectant  35.227 13.7167 151    1   9.148  <.0001
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

6.2 Higher cap value for Klebsiella counts (10000)

# Cap TNTC at 10000
data <- dat %>%
  mutate(
    Kleb_capped_10000 = ifelse(Klebsiella_count == "TNTC", 10000, as.numeric(Klebsiella_count))
  )

# Fit model 
m_10000 <- lmer(log(Kleb_capped_10000 + 1) ~ Bedding * Treatment + (1 | Experiment_rep), data = data)

# ICC + diagnostics
icc(m_10000)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.428
##   Unadjusted ICC: 0.225

performance::check_model(m_10000)

# Pairwise comparisons
emm_10000 <- emmeans(m_log, ~ Treatment | Bedding, type = "response")
pairs(emm_10000, adjust = "tukey")

## Bedding = Wood shavings:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        1.300  0.5062 151    1   0.674  0.7790
##  Control / Disinfectant       0.171  0.0666 151    1  -4.536  <.0001
##  Conditioner / Disinfectant   0.132  0.0512 151    1  -5.210  <.0001
## 
## Bedding = Sand:
##  contrast                     ratio      SE  df null t.ratio p.value
##  Control / Conditioner        3.005  1.1702 151    1   2.826  0.0147
##  Control / Disinfectant     105.866 41.2216 151    1  11.973  <.0001
##  Conditioner / Disinfectant  35.227 13.7167 151    1   9.148  <.0001
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

The model does not appear to be sensitive to increasing cap values. Model fit, adjusted ICCs and significant comparisons appear similar for all three models (caps = 2500, 5000, 10000).

6.3 Testing GLMM

dat_nb <- dat %>%
  filter(!is.na(Kleb_capped), Kleb_capped >= 0) %>%
  mutate(
    Kleb_capped = as.integer(round(Kleb_capped, 0)),  # ensure counts are integers
    Bedding = factor(Bedding),
    Treatment = factor(Treatment),
    Experiment_rep = factor(Experiment_rep)
  )

m_glmer_nb <- glmer.nb(
  Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep),
  data = dat_nb
)

summary(m_glmer_nb)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(0.6115)  ( log )
## Formula: Kleb_capped ~ Bedding * Treatment + (1 | Experiment_rep)
##    Data: dat_nb
## 
##      AIC      BIC   logLik deviance df.resid 
##   2236.0   2260.7  -1110.0   2220.0      154 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7798 -0.6633 -0.3893  0.5598  6.0798 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  Experiment_rep (Intercept) 1.192    1.092   
## Number of obs: 162, groups:  Experiment_rep, 6
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         5.8843     0.5213  11.287  < 2e-16 ***
## BeddingSand                         0.8088     0.3833   2.110   0.0348 *  
## TreatmentConditioner               -0.1228     0.3530  -0.348   0.7280    
## TreatmentDisinfectant               1.6162     0.3675   4.398 1.09e-05 ***
## BeddingSand:TreatmentConditioner   -0.6791     0.4986  -1.362   0.1732    
## BeddingSand:TreatmentDisinfectant  -4.9859     0.5669  -8.796  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) BddngS TrtmnC TrtmnD BdS:TC
## BeddingSand -0.392                            
## TrtmntCndtn -0.335  0.450                     
## TrtmntDsnfc -0.374  0.532  0.499              
## BddngSnd:TC  0.243 -0.644 -0.712 -0.364       
## BddngSnd:TD  0.300 -0.734 -0.341 -0.734  0.468

Anova(m_glmer_nb, type = "III")

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: Kleb_capped
##                      Chisq Df Pr(>Chisq)    
## (Intercept)       127.4032  1  < 2.2e-16 ***
## Bedding             4.4534  1    0.03483 *  
## Treatment          27.9499  2  8.526e-07 ***
## Bedding:Treatment  87.0687  2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sim <- simulateResiduals(m_glmer_nb)
plot(sim)

testDispersion(sim)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.029037, p-value = 0.088
## alternative hypothesis: two.sided

testZeroInflation(sim)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 3.4941, p-value < 2.2e-16
## alternative hypothesis: two.sided

KS test: mild deviation of residual distribution from uniformity (p = 0.04794).
Dispersion test: adequately accounts for overdispersion (p = 0.088).
Outlier test: no individual observations are excessively influencing model fit (p = 0.9).

emm_nb <- emmeans(m_glmer_nb, ~ Treatment | Bedding, type = "response")
summary(emm_nb)

## Bedding = Wood shavings:
##  Treatment    response    SE  df asymp.LCL asymp.UCL
##  Control         359.4 187.3 Inf    129.35     998.3
##  Conditioner     317.8 166.1 Inf    114.14     885.0
##  Disinfectant   1809.0 928.4 Inf    661.61    4946.3
## 
## Bedding = Sand:
##  Treatment    response    SE  df asymp.LCL asymp.UCL
##  Control         806.8 413.1 Inf    295.77    2201.1
##  Conditioner     361.9 185.6 Inf    132.39     989.1
##  Disinfectant     27.8  14.6 Inf      9.88      77.9
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log scale

pairs(emm_nb, adjust = "tukey")

## Bedding = Wood shavings:
##  contrast                    ratio      SE  df null z.ratio p.value
##  Control / Conditioner       1.131  0.3992 Inf    1   0.348  0.9355
##  Control / Disinfectant      0.199  0.0730 Inf    1  -4.398  <.0001
##  Conditioner / Disinfectant  0.176  0.0634 Inf    1  -4.819  <.0001
## 
## Bedding = Sand:
##  contrast                    ratio      SE  df null z.ratio p.value
##  Control / Conditioner       2.230  0.7803 Inf    1   2.291  0.0570
##  Control / Disinfectant     29.069 11.2795 Inf    1   8.684  <.0001
##  Conditioner / Disinfectant 13.037  5.0308 Inf    1   6.654  <.0001
## 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

Results of the negative binomial GLMM are largely consistent with the log-transformed LMM. Therefore, observed treatment effects are not strongly dependent on model specification. However, the comparison between conditioner and the control in sand bedding is only marginally non-significant in the GLMM (p = 0.0570) but is significant in the LMM (p = 0.0147). This suggests that the evidence for any effect of conditioner is weakened when accounting for overdispersion and count-scale variablity.

7. E. coli model

7.1 Data import and exploration

raw2 <- read_excel("DATA Experiment recording.xlsx", 
    sheet = "E.coli")

glimpse(raw2)

## Rows: 54
## Columns: 11
## $ `Sample number`     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Experiment_rep      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Bedding             <chr> "Wood shavings", "Wood shavings", "Wood shavings",…
## $ Treatment           <chr> "Conditioner", "Conditioner", "Conditioner", "Disi…
## $ Replicate           <dbl> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2,…
## $ E.coli_count_MacA   <dbl> 47, 53, 42, 190, 168, 142, 32, 24, 11, 21, 27, 16,…
## $ E.coli_count_Mac    <dbl> 215, 326, 310, 354, 2500, 2500, 2072, 952, 2144, 2…
## $ `E.coli count SCAI` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Total CFU MacA`    <dbl> 145, 174, 182, 562, 542, 538, 244, 188, 286, 1272,…
## $ `Total CFU SCAI`    <dbl> 0, 0, 0, 0, 112, 0, 0, 0, 0, 147, 264, 158, 0, 0, …
## $ `Total CFU Mac`     <dbl> 215, 326, 310, 354, 2500, 2500, 2072, 961, 2144, 2…

dat_e <- raw2 %>%
  mutate(
    Bedding      = str_squish(as.character(Bedding)),
    Treatment  = str_squish(as.character(Treatment)),
    Bedding      = factor(Bedding, levels = c("Wood shavings", "Sand")),
    Treatment  = factor(Treatment, levels = c("Control", "Conditioner", "Disinfectant")),
    Experiment_rep   = as.factor(Experiment_rep),
    Replicate        = as.factor(`Replicate`)
  )
glimpse(dat_e)

## Rows: 54
## Columns: 11
## $ `Sample number`     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Experiment_rep      <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Bedding             <fct> Wood shavings, Wood shavings, Wood shavings, Wood …
## $ Treatment           <fct> Conditioner, Conditioner, Conditioner, Disinfectan…
## $ Replicate           <fct> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2,…
## $ E.coli_count_MacA   <dbl> 47, 53, 42, 190, 168, 142, 32, 24, 11, 21, 27, 16,…
## $ E.coli_count_Mac    <dbl> 215, 326, 310, 354, 2500, 2500, 2072, 952, 2144, 2…
## $ `E.coli count SCAI` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Total CFU MacA`    <dbl> 145, 174, 182, 562, 542, 538, 244, 188, 286, 1272,…
## $ `Total CFU SCAI`    <dbl> 0, 0, 0, 0, 112, 0, 0, 0, 0, 147, 264, 158, 0, 0, …
## $ `Total CFU Mac`     <dbl> 215, 326, 310, 354, 2500, 2500, 2072, 961, 2144, 2…

summary(dat_e$E.coli_count_Mac)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    5.75 1072.00 1157.43 2164.00 2500.00

The median (1072) is similar to the mean (1157.43) for E.coli counts on MacConkey agar suggesting the distribution is not highly skewed.

print(summarytools::dfSummary(dat_e,valid.col=FALSE, graph.magnif=0.8, style="multiline"),method = "render")

Data Frame Summary

dat_e

Dimensions: 54 x 11
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Missing

Sample number [numeric]

Mean (sd) : 27.5 (15.7)

min ≤ med ≤ max:

1 ≤ 27.5 ≤ 54

IQR (CV) : 26.5 (0.6)

54 distinct values

0 (0.0%)

Experiment_rep [factor]

1. 1

2. 2

3. 3

18	(	33.3%	)
18	(	33.3%	)
18	(	33.3%	)

0 (0.0%)

Bedding [factor]

1. Wood shavings

2. Sand

27	(	50.0%	)
27	(	50.0%	)

0 (0.0%)

Treatment [factor]

1. Control

2. Conditioner

3. Disinfectant

18	(	33.3%	)
18	(	33.3%	)
18	(	33.3%	)

0 (0.0%)

Replicate [factor]

1. 1

2. 2

3. 3

18	(	33.3%	)
18	(	33.3%	)
18	(	33.3%	)

0 (0.0%)

E.coli_count_MacA [numeric]

Mean (sd) : 355 (548.7)

min ≤ med ≤ max:

0 ≤ 42 ≤ 1712

IQR (CV) : 474 (1.5)

36 distinct values

0 (0.0%)

E.coli_count_Mac [numeric]

Mean (sd) : 1157.4 (1040.8)

min ≤ med ≤ max:

0 ≤ 1072 ≤ 2500

IQR (CV) : 2158.2 (0.9)

30 distinct values

0 (0.0%)

E.coli count SCAI [numeric]

1 distinct value

(

100.0%

)

0 (0.0%)

Total CFU MacA [numeric]

Mean (sd) : 650.1 (747.3)

min ≤ med ≤ max:

0 ≤ 250 ≤ 2312

IQR (CV) : 1290.8 (1.1)

42 distinct values

0 (0.0%)

Total CFU SCAI [numeric]

Mean (sd) : 40.1 (65.3)

min ≤ med ≤ max:

0 ≤ 0 ≤ 264

IQR (CV) : 70.5 (1.6)

18 distinct values

0 (0.0%)

Total CFU Mac [numeric]

Mean (sd) : 1151.6 (1038.6)

min ≤ med ≤ max:

0 ≤ 961 ≤ 2500

IQR (CV) : 2149 (0.9)

31 distinct values

1 (1.9%)

Generated by summarytools 1.1.4 (R version 4.3.0)
2026-01-28

dat_e %>%
  group_by(Bedding, Treatment) %>%
  summarise(
    mean_count = mean(E.coli_count_Mac, na.rm = TRUE),
    sd_count = sd(E.coli_count_Mac, na.rm = TRUE),
    mean_ecoli = mean(E.coli_count_Mac, na.rm = TRUE),
    sd_ecoli = sd(E.coli_count_Mac, na.rm = TRUE),
    n = n()
  )

## # A tibble: 6 × 7
## # Groups:   Bedding [2]
##   Bedding       Treatment    mean_count sd_count mean_ecoli sd_ecoli     n
##   <fct>         <fct>             <dbl>    <dbl>      <dbl>    <dbl> <int>
## 1 Wood shavings Control         1632.      580.     1632.      580.      9
## 2 Wood shavings Conditioner       94.7     145.       94.7     145.      9
## 3 Wood shavings Disinfectant     870.      943.      870.      943.      9
## 4 Sand          Control         2500         0      2500         0       9
## 5 Sand          Conditioner     1839.      528.     1839.      528.      9
## 6 Sand          Disinfectant       9.11     21.1       9.11     21.1     9

7.2 LMM (non-transformed)

me0 <- lmer(E.coli_count_Mac ~ Bedding * Treatment + (1 | Experiment_rep), data = dat_e, REML = TRUE)

# Normality
residual_ecoli <- residuals(me0)

shapiro.test(residual_ecoli)

## 
##  Shapiro-Wilk normality test
## 
## data:  residual_ecoli
## W = 0.89572, p-value = 0.0002026

# Equal variance 
leveneTest(E.coli_count_Mac ~ Bedding * Treatment, data = dat_e)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  5  3.6099 0.007494 **
##       48                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Not normal (p < 0.05)
Not equal variances (p < 0.05).
See log transformation comparison below. QQ plots indicate that normality of residuals is improved with transformation so transformed data will be used in the model.

performance::check_model(me0)

7.2 LMM (log transformed)

m_e_log <- lmer(log(E.coli_count_Mac + 1) ~ Bedding * Treatment + (1 | Experiment_rep), data = dat_e, REML = TRUE)

residual_e <- residuals(m_e_log)

shapiro.test(residual_e)

## 
##  Shapiro-Wilk normality test
## 
## data:  residual_e
## W = 0.9258, p-value = 0.002496

residuals still not normal (p < 0.05). Check with QQ-plots.

dat_e$Group <- interaction(dat_e$Bedding, dat_e$Treatment)

leveneTest(log(E.coli_count_Mac + 1) ~ Group, data = dat_e)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  5  2.1568 0.07458 .
##       48                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

equal variances across groups (p > 0.05)

performance::check_model(m_e_log)

7.3 Intraclass correlation coefficient

performance::icc(m_e_log)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.215
##   Unadjusted ICC: 0.054

21.5% of variation in E.coli CFU is due to differences between experimental reps, indicating a low level of agreement in terms of how consistently the model distinguishes between groups.

7.4 Estimated marginal means (EMMs)

emm_e <- emmeans(m_e_log, ~ Treatment | Bedding, type = "response")
emm_df_e <- as.data.frame(emm_e)
emm_df_e

## Bedding = Wood shavings:
##  Treatment     response        SE   df lower.CL  upper.CL
##  Control      1507.0934  966.3010 6.59 324.1731  6993.261
##  Conditioner     6.0664    4.5278 6.59   0.5237    31.773
##  Disinfectant  362.4797  232.8972 6.59  77.3730  1684.752
## 
## Bedding = Sand:
##  Treatment     response        SE   df lower.CL  upper.CL
##  Control      2500.0000 1602.4995 6.59 538.2623 11598.181
##  Conditioner  1769.2404 1134.2700 6.59 380.6969  8209.051
##  Disinfectant    1.7068    1.7344 6.59  -0.4164    11.554
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log(mu + 1) scale

These results describe the model-adjusted predictions for each conditioner within each bedding type.
Disinfectant resulted in the lowest predicted E.coli growth in sand, while conditioner produced the lowest growth in wood shavings.

7.5 Pairwise comparisons

pairs(emm_e, adjust = "tukey")

## Bedding = Wood shavings:
##  contrast                      ratio       SE df null t.ratio p.value
##  Control / Conditioner      213.4171 143.3035 46    1   7.987  <.0001
##  Control / Disinfectant       4.1490   2.7860 46    1   2.119  0.0971
##  Conditioner / Disinfectant   0.0194   0.0131 46    1  -5.868  <.0001
## 
## Bedding = Sand:
##  contrast                      ratio       SE df null t.ratio p.value
##  Control / Conditioner        1.4128   0.9487 46    1   0.515  0.8646
##  Control / Disinfectant     923.9718 620.4206 46    1  10.170  <.0001
##  Conditioner / Disinfectant 653.9993 439.1418 46    1   9.655  <.0001
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale

In wood shavings, conditioner significantly reduced E. coli counts compared to the Control and disinfectant groups. Predicted control and disinfectant counts did not differ significantly.
In sand, disinfectant significantly reduced E. coli counts compared to the Control and conditioner groups. Predicted Control and conditioner counts did not differ significantly.

7.6 Model fixed effects

fixed_e <- tidy(m_e_log, effects = "fixed", conf.int = TRUE)
fixed_e

## # A tibble: 6 × 9
##   effect term     estimate std.error statistic    df  p.value conf.low conf.high
##   <chr>  <chr>       <dbl>     <dbl>     <dbl> <dbl>    <dbl>    <dbl>     <dbl>
## 1 fixed  (Interc…    7.32      0.641    11.4    6.59 1.38e- 5    5.78     8.85  
## 2 fixed  Bedding…    0.506     0.671     0.753 46.0  4.55e- 1   -0.846    1.86  
## 3 fixed  Treatme…   -5.36      0.671    -7.99  46.0  3.09e-10   -6.71    -4.01  
## 4 fixed  Treatme…   -1.42      0.671    -2.12  46.0  3.95e- 2   -2.77    -0.0713
## 5 fixed  Bedding…    5.02      0.950     5.28  46.0  3.37e- 6    3.11     6.93  
## 6 fixed  Bedding…   -5.41      0.950    -5.69  46.0  8.34e- 7   -7.32    -3.49

Using the wood shavings control group as the reference, predicted means (CFU) were:

1508.1 (Shavings:Control)
7.07 (Shavings:Conditioner)
363.5 (Shavings:Disinfectant)
2501 (Sand:Control)
1770 (Sand:Conditioner)
2.71 (Sand:Disinfectant)

# shared y range + breaks
y_max_all_e <- max(emm_df_e$upper.CL, na.rm = TRUE)
ylim_e      <- c(0, y_max_all_e * 1.35)
y_breaks_e  <- pretty(ylim_e, n = 6)

# relabel treatments for x axis
treatment_labels <- c(
  "Control"     = "Control",
  "ZorbiFresh"  = "Conditioner",
  "Doxall"      = "Disinfectant"
)

# Wood Shavings
emm_shavings_e <- subset(emm_df_e, Bedding == "Wood shavings")

p1_e <- ggplot(emm_shavings_e,
               aes(x = Treatment, y = response, fill = Treatment)) +
  geom_col(width = 0.6) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  geom_signif(xmin = 1, xmax = 2, annotations = "p < .0001",
              y_position = y_max_all_e * 1.15, tip_length = 0.01, textsize = 3.5) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.25, tip_length = 0.01, textsize = 3.5) +
  labs(
    title = "Wood Shavings",
    x = "Treatment",
    y = expression("Predicted " * italic("E. coli") * " CFU")
  ) +
  scale_fill_grey(start = 0.4, end = 0.8) +
  scale_x_discrete(labels = treatment_labels) +
  theme_bw(base_size = 12) +
  theme(legend.position = "none") +
  scale_y_continuous(limits = ylim_e, breaks = y_breaks_e)

# Sand
emm_sand_e <- subset(emm_df_e, Bedding == "Sand")

p2_e <- ggplot(emm_sand_e,
               aes(x = Treatment, y = response, fill = Treatment)) +
  geom_col(width = 0.6) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.15, tip_length = 0.01, textsize = 3.5) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.25, tip_length = 0.01, textsize = 3.5) +
  labs(
    title = "Sand",
    x = "Treatment",
    y = NULL
  ) +
  scale_fill_grey(start = 0.4, end = 0.8) +
  scale_x_discrete(labels = treatment_labels) +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "none",
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  ) +
  scale_y_continuous(limits = ylim_e, breaks = y_breaks_e)

p1_e + p2_e

- This box and whisker plot compares Predicted E. coli CFU across bedding treatment groups, showing significant differences with brackets.

# shared y range + breaks
y_max_all_e <- max(emm_df_e$upper.CL, na.rm = TRUE)
ylim_e      <- c(0, y_max_all_e * 1.35)
y_breaks_e  <- pretty(ylim_e, n = 6)

treatment_labels <- c(
  "Control"     = "Control",
  "ZorbiFresh"  = "Conditioner",
  "Doxall"      = "Disinfectant"
)

base_theme <- theme_bw(base_size = 11) +
  theme(
    legend.position = "none",
    plot.margin = margin(5, 5, 5, 5)
  )

# Wood shavings
emm_shavings_e <- subset(emm_df_e, Bedding == "Wood shavings")

p1_e <- ggplot(emm_shavings_e,
       aes(x = Treatment, y = response)) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15) +
  geom_signif(xmin = 1, xmax = 2, annotations = "p < .0001",
              y_position = y_max_all_e * 1.15, tip_length = 0.01, textsize = 3.2) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.25, tip_length = 0.01, textsize = 3.2) +
  labs(
    title = "Wood shavings",
    x = "Treatment",
    y = expression("Model-predicted " * italic("E. coli") * " colony count ")
  ) +
  scale_x_discrete(labels = treatment_labels) +
  scale_y_continuous(limits = ylim_e, breaks = y_breaks_e) +
  base_theme

# Sand
emm_sand_e <- subset(emm_df_e, Bedding == "Sand")

p2_e <- ggplot(emm_sand_e,
       aes(x = Treatment, y = response)) +
  geom_point(size = 2.8) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15) +
  geom_signif(xmin = 1, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.15, tip_length = 0.01, textsize = 3.2) +
  geom_signif(xmin = 2, xmax = 3, annotations = "p < .0001",
              y_position = y_max_all_e * 1.25, tip_length = 0.01, textsize = 3.2) +
  labs(
    title = "Sand",
    x = "Treatment",
    y = NULL
  ) +
  scale_x_discrete(labels = treatment_labels) +
  scale_y_continuous(limits = ylim_e, breaks = y_breaks_e) +
  base_theme +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  )

p1_e + p2_e

This box and whisker plot compares model predicted E. coli mean colony counts across bedding treatment groups, showing significant differences with brackets.

Analysis Log – Bedding material determines treatment efficacy against Klebsiella pneumoniae

Gabriella Harb

October 26, 2025

Part 1: Media Selection

1. Packages and Data Import

2. Data Exploration

3. Conclusions

—

Part 2: Bedding Inoculation Model Development

1. Packages and Data Import

2. Data Exploration and Model Development

3. Conclusions

—

Part 3: Experimental Data Analysis

1. Overview

2. Packages and Data Import

3. Data Exploration

Data Frame Summary

dat

4. Linear Mixed Model (LMM) (non-transformed)

4.1 Comparing inclusion of random effect terms

4.2 Model assumptions

4.3 Model diagnostics

5. LMM (Log transformed)

5.1 Model assumptions

5.2 Model diagnostics

5.3 Intraclass correlation coefficient

5.4 Examine strains (Milk vs Faecal)

5.5 Estimated marginal means (EMMs)

5.6 Pairwise comparisons

5.7 Model fixed effects

6. Sensitivity analysis

6.1 Higher cap value for Klebsiella counts (5000)

6.2 Higher cap value for Klebsiella counts (10000)

6.3 Testing GLMM

7. E. coli model

7.1 Data import and exploration

Data Frame Summary

dat_e

7.2 LMM (non-transformed)

7.2 LMM (log transformed)

7.3 Intraclass correlation coefficient

7.4 Estimated marginal means (EMMs)

7.5 Pairwise comparisons

7.6 Model fixed effects