Load libraries.

library(gridExtra)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggfortify)
library(readxl) # package to read .xls files

# Packages for raincloud plots
library(ggdist)      # for stat_halfeye()
library(ggbeeswarm)  # for nicer jitter

# Packages for modeling
library(lme4)
library(car)
library(arm)
library(lmerTest)
library(ggeffects)
library(emmeans)

Question 1: interactions and mixed-effects modelling

Load lifespan dataset and check the data.

# Read the "Newts" sheet
lifespan <- read.csv("/Users/joylynngoh/Files/Uni/Y4/ES3307/Assignment 3/Assignment 3 data/lifespan.csv")


# Quick check of data structure
head(lifespan)
##   year Male.ID IR.treatment population origin   date lifespan line
## 1 2015     117         20Gy         C1    A30 28-Dec       11    1
## 2 2015      99         20Gy         C1    A30 28-Dec       12    1
## 3 2015     104         Ctrl         C1    A30 28-Dec       14    1
## 4 2015     109         Ctrl         C1    A30 28-Dec       14    1
## 5 2015     108         Ctrl         C1    A30 28-Dec       14    1
## 6 2015     100         Ctrl         C1    A30 28-Dec       14    1
str(lifespan)
## 'data.frame':    1049 obs. of  8 variables:
##  $ year        : int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ Male.ID     : chr  "117" "99" "104" "109" ...
##  $ IR.treatment: chr  "20Gy" "20Gy" "Ctrl" "Ctrl" ...
##  $ population  : chr  "C1" "C1" "C1" "C1" ...
##  $ origin      : chr  "A30" "A30" "A30" "A30" ...
##  $ date        : chr  "28-Dec" "28-Dec" "28-Dec" "28-Dec" ...
##  $ lifespan    : int  11 12 14 14 14 14 14 14 14 16 ...
##  $ line        : int  1 1 1 1 1 1 1 1 1 1 ...

Research question: how is longevity affected by preadaptation and temperature? Do these effects vary with year?

Factorise line, year, origin, population.

lifespan <- lifespan %>%
  mutate(across(c(year, origin, line, population), as.factor))

str(lifespan)
## 'data.frame':    1049 obs. of  8 variables:
##  $ year        : Factor w/ 2 levels "2015","2016": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Male.ID     : chr  "117" "99" "104" "109" ...
##  $ IR.treatment: chr  "20Gy" "20Gy" "Ctrl" "Ctrl" ...
##  $ population  : Factor w/ 6 levels "C1","C2","C3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ origin      : Factor w/ 3 levels "A30","A36","P36": 1 1 1 1 1 1 1 1 1 1 ...
##  $ date        : chr  "28-Dec" "28-Dec" "28-Dec" "28-Dec" ...
##  $ lifespan    : int  11 12 14 14 14 14 14 14 14 16 ...
##  $ line        : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...

1A. Draw out the experiment’s design and identify nested/crossed structures in the data.

Crossed fixed factors are year × origin. There is partial crossing and partial nesting for population within origin (C1, C2, C3 are crossed between A30 and A36 while T1, T3, T4 are nested within P36).

Q1A experiment design
Q1A experiment design

1B. Plot the raw data to show how the lifespan of beetles varies with origin, population, and year.

# Raincloud plot
ggplot(lifespan, aes(x = origin, y = lifespan, fill = population)) +
  # boxplot (median + IQR)
  geom_boxplot(width = 0.55, outlier.shape = NA,
               position = position_dodge(1)) +
  # raw points (population shown via colour)
  geom_point(position = position_jitterdodge(jitter.width = 0.15, 
                                    jitter.height = 0.25,
                                    dodge.width  = 0.9),
    alpha = 0.45, size = 0.3) +
  facet_wrap(~ year) +
  labs(
    x = "Origin (treatment)",
    y = "Lifespan (days)",
    color = "Population",
    title = "Fig 1A: Beetle lifespan by origin, population, and year"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "right")

From Fig. 1A, the higher temperature lines (A36) have lower lifespan than the lower temperature lines (A30) and the pre-adapted lines seem to be closer in lifespan to the lower temperature lines. There does not seem to be an obvious difference between 2015 and 2016 population lifespans, but then again, the variance across all the populations and years is quite broad, so none of the lines or years seem significantly different from each other.

1C. Plot an interaction plot between origin and year.

lifespan_MeanSE <- lifespan %>%
  group_by(origin, year) %>%
  summarise(
    mean = mean(lifespan, na.rm = TRUE),
    se = sd(lifespan, na.rm = TRUE) / sqrt(n())
  )

# interaction plot
ggplot(lifespan_MeanSE, aes(x = year, y = mean, group = origin, 
                            color = factor(origin))) +
  geom_line(linewidth = 0.5) +
  geom_point(size = 1) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
                width = 0.05, linewidth = 0.5) +
  labs(x = "Year", y = "Mean lifespan (days)",
       color = "Origin",
       title = "Fig 1B: Interaction plot of origin × year") +
  theme_minimal()

Based on Fig. 1B, there seems to be strong evidence that the low temperature (A30) lines and preadaptation line (P36) have longer lifespan as compared to no preadaptation, higher temperature (A36). Exposing the beetles to these conditions for 15 generations longer (2016 vs 2015) also seems to significantly increase lifespan for A30 and P36 but the effect seems to be limited for A36.

1D. Use lmer to fit an appropriate random intercept model.

options(show.signif.stars = FALSE)
model1.1 <- lmer(lifespan ~ origin * year + (1 | population), 
                 data = lifespan)

autoplot(lm(lifespan$lifespan ~ lifespan$origin*lifespan$year),
         which = c(1:3, 5), ncol = 2, label.size = 3, label.repel = TRUE) + 
  theme_minimal()

The four plots show there is general homoscedasticity, normality and linearity, and no heavily influential outliers. The residuals are stratified because both predictors origin and year are categorical variables, so the fitted values are the estimated mean per level of each combination of origin and year, while the residuals are within-group variations. Overall, the model seems to be an acceptable fit.

1E. Refitting model, comparing models. Do you need to keep the interaction term or not?

model1.2 <- lmer(lifespan ~ origin * year + (1 | population), 
                 data = lifespan, REML = F)

model1.3 <- lmer(lifespan ~ origin + year + (1 | population), 
                 data = lifespan, REML = F)

anova(model1.2, model1.3, refit = F)
## Data: lifespan
## Models:
## model1.3: lifespan ~ origin + year + (1 | population)
## model1.2: lifespan ~ origin * year + (1 | population)
##          npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## model1.3    6 5520.2 5549.9 -2754.1    5508.2                     
## model1.2    8 5495.7 5535.4 -2739.8    5479.7 28.479  2  6.543e-07

Does including the origin × year interaction significantly improve model fit? \(p < 0.0001\). There is extremely strong evidence that interaction improves model fit. The interaction term is must be kept.

1F. Refit your selected model using REML = TRUE

model1.4 <- lmer(lifespan ~ origin * year + (1 | population), 
                 data = lifespan, REML = T)

1G. What does the summary table tell about the importance of random effects?

summary(model1.4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: lifespan ~ origin * year + (1 | population)
##    Data: lifespan
## 
## REML criterion at convergence: 5480.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0126 -0.5977 -0.0044  0.6512  3.9868 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  population (Intercept)  0.9284  0.9635  
##  Residual               10.7651  3.2810  
## Number of obs: 1049, groups:  population, 6
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)          15.24401    0.61645    5.35687  24.729 9.95e-07
## originA36            -3.02709    0.36313 1040.17284  -8.336 2.41e-16
## originP36            -1.15280    0.86670    5.23283  -1.330    0.239
## year2016              2.02874    0.35858 1039.10745   5.658 1.98e-08
## originA36:year2016   -2.35309    0.51359 1039.22521  -4.582 5.17e-06
## originP36:year2016    0.03744    0.49103 1039.44397   0.076    0.939
## 
## Correlation of Fixed Effects:
##             (Intr) orgA36 orgP36 yr2016 oA36:2
## originA36   -0.316                            
## originP36   -0.711  0.225                     
## year2016    -0.318  0.541  0.226              
## orgA36:2016  0.222 -0.703 -0.158 -0.698       
## orgP36:2016  0.232 -0.395 -0.310 -0.730  0.510

The variance of the random effect is 1.318. Population random effect constitutes:

0.9274/(0.9274+10.7651)
## [1] 0.0793158

about 7.93% of remaining variance after the fixed effects are accounted for. This tells me that the random effect is not that important to be included in the model.

1H. Look at the ANOVA table. Report the appropriate F, dfs, and p-values, and interpret them.

anova(model1.4)
## Type III Analysis of Variance Table with Satterthwaite's method
##              Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)
## origin      2865.28 1432.64     2    6.64 133.082 4.383e-06
## year         406.95  406.95     1 1039.34  37.803 1.114e-09
## origin:year  310.72  155.36     2 1039.40  14.432 6.575e-07

Since the interaction is statistically significant, I will only report the origin:year stats. There is very strong evidence that the interaction of origin and year (ie. the interaction between temperature and pre-adaptation conditions, and the length of time allowed for adaptation), affects the lifespan of the beetles (\(F_{2,1039} = 14.4, \ P < 0.0001\)). Fig. 1B confirms the results with direction: beetle lifespans are significantly higher at low temperature and even higher when there are more generations for beetles to adapt.

1I. Plot the model estimated means and intervals for your final model.

predict1 <- ggpredict(model1.4, terms = c("origin", "year"), type = "random")
predictPlot <- plot(predict1)
predictPlot + labs(title = "Fig. 1C: Estimated means and intervals for origin, year, and population")

Since the random effect of population is not significantly influential on the model, I chose to remove it for Fig. 1C. Now, A30 and P36 are significantly visually different from A36, but A30 and P36 are not significantly different from each other. Additionally, 2016 lifespans are moderately significantly higher than 2015 lifespans except for A36, where it seems that exposing the beetles to higher temperature line for more generations decreased their lifespan instead. These conclusions match up with Fig. 1B.


Question 2: repeated measures and nested analyses

Load mixed models data dataset and check the data.

# Read the "Newts" sheet
soil <- read_excel(path = '/Users/joylynngoh/Files/Uni/Y4/ES3307/Assignment 3/Assignment 3 data/Mixed models data.xlsx')


# Quick check of data structure
head(soil)
## # A tibble: 6 × 3
##   TREATMNT PLANT DENSITY
##      <dbl> <dbl>   <dbl>
## 1        1     1    87  
## 2        1     1   106. 
## 3        1     1    63.7
## 4        1     1    60.1
## 5        1     1    66.1
## 6        1     1    37.9
str(soil)
## tibble [60 × 3] (S3: tbl_df/tbl/data.frame)
##  $ TREATMNT: num [1:60] 1 1 1 1 1 1 1 1 1 1 ...
##  $ PLANT   : num [1:60] 1 1 1 1 1 1 1 1 1 1 ...
##  $ DENSITY : num [1:60] 87 106.1 63.7 60.1 66.1 ...

Factorise TREATMNT and PLANT.

soil <- soil %>%
  mutate(across(c(TREATMNT, PLANT), as.factor))
str(soil)
## tibble [60 × 3] (S3: tbl_df/tbl/data.frame)
##  $ TREATMNT: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PLANT   : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DENSITY : num [1:60] 87 106.1 63.7 60.1 66.1 ...

PLANT should be numbered uniquely from 1 to 6 but this will be dealt with later (initially I recoded the variable before starting on 2A but moved the recoding below to question 2C).

2A. Plot the raw data to show how density varies with treatment and plant.

ggplot(soil, aes(x = TREATMNT, y = DENSITY, fill = PLANT)) +
  geom_boxplot() + 
  theme_minimal() +
  labs(title = "Fig. 2A: Raw data density by treatment")

From Fig. 2A, it seems that treatment 2 produces higher average bacterial density than treatment 1, and plant 3 has the highest average density and plant 1 the lowest average density. However, it is also hard to say whether the means are significantly different, given that variation within plant measurements is also high. There are also several outliers.

2B. Fit the model DENSITY ~ TREATMNT, and interpret it on the assumption it is a reasonable analysis to perform.

model2.1 <- lm(DENSITY~TREATMNT, data = soil)
anova(model2.1) # to compare F-values and p-values
## Analysis of Variance Table
## 
## Response: DENSITY
##           Df Sum Sq Mean Sq F value    Pr(>F)
## TREATMNT   1   9543  9543.2  12.504 0.0008059
## Residuals 58  44266   763.2
display(model2.1) # to compare fitted coefficients
## lm(formula = DENSITY ~ TREATMNT, data = soil)
##             coef.est coef.se
## (Intercept) 85.41     5.04  
## TREATMNT2   25.22     7.13  
## ---
## n = 60, k = 2
## residual sd = 27.63, R-Squared = 0.18

Assuming that this is a reasonable analysis (and that it’s reasonable to ignore PLANT as a blocking factor), there is very strong evidence that TREATMNT positively affects DENSITY (\(F_{1,58} = 12.5, \ P = 0.0008\)), where soil type 2 produces higher bacterial density than soil type 1, according to the display function.

autoplot(lm(soil$DENSITY ~ soil$TREATMNT),
         which = c(1:3, 5), ncol = 2, label.size = 3, label.repel = TRUE) + 
  theme_minimal()

Testing the model fit, it seems that there is no heteroscedasticity, and the residuals seem to be quite linear and normal. No influential outliers either.

2C. Fit a model that treats plant as a random factor. Interpret this second analysis, emphasising any difference in the conclusions.

When PLANT is numbered 1 to 3 in each treatment, R treats PLANT as crossed with TREATMNT when it is actually nested within TREATMNT. Renumber plants 1-3 under treatment 2 as 4-6. I do not need to replot the data since the only difference would be the numbers in the x-axis for PLANT.

soil <- soil %>%
  mutate(PLANT_RENUM = case_when(
    TREATMNT == 1 ~ as.numeric(as.character(PLANT)),
    TREATMNT == 2 ~ as.numeric(as.character(PLANT)) + 3
  ))

model2.2 <- lmer(DENSITY~TREATMNT + (1|PLANT_RENUM), data = soil)
anova(model2.2)
## Type III Analysis of Variance Table with Satterthwaite's method
##          Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## TREATMNT 1410.6  1410.6     1     4  2.5831 0.1833
display(model2.2)
## lmer(formula = DENSITY ~ TREATMNT + (1 | PLANT_RENUM), data = soil)
##             coef.est coef.se
## (Intercept) 85.41    11.10  
## TREATMNT2   25.22    15.69  
## 
## Error terms:
##  Groups      Name        Std.Dev.
##  PLANT_RENUM (Intercept) 17.74   
##  Residual                23.37   
## ---
## number of obs: 60, groups: PLANT_RENUM, 6
## AIC = 552.6, DIC = 570.3
## deviance = 557.5

From the analysis, I observe that there is weak evidence that the soil treatments produce different bacterial densities (\(F_{1,4} = 2.58, \ P = 0.183\)).

What has changed? First, the denominator degrees of freedom has dramatically deflated with proper nesting of PLANT in the model. Secondly, the F-value has decreased and the p-value shows that there is weak evidence that treatment 1 results in higher bacterial density than in treatment 2. These results how seem to reflect Fig. 2B much more accurately.

The difference is that when PLANT was not included in the model (and was improperly crossed with TREATMNT), I am led to conclude that there is strong evidence to show that treatment affects density (in 2B), but when PLANT was included as a random blocking factor, I conclude that treatment does not statistically significantly affect density.

2D. Why can you not fit a model with a random slope and intercept model to this data?

First, TREATMNT is not a continuous variable, which means it does not vary continuously with DENSITY. With only 2 levels, the calculated slope will not be accurate.

Secondly, there are not enough replicates at the plant (random effect) level to fit a random slope AND intercept. There are only 6 plants, and each belong to only 1 treatment. To include a random slope, each plant must be observed under both treatments, and far more replication on the same plant is needed to estimate within-plant treatment slope.

2E. For your fixed effects, report the appropriate F, dfs, and p-values from the ANOVA table. Report the importance of your random effects. Interpret your results.

From the analysis in 2C, I observe that there is weak evidence that treatment 1 results in higher bacterial density than in treatment 2 (\(F_{1,4} = 2.58, \ P = 0.183\)). Next, I report the importance of random effects by calculating percentage variation from the summary() table.

summary(model2.2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: DENSITY ~ TREATMNT + (1 | PLANT_RENUM)
##    Data: soil
## 
## REML criterion at convergence: 544.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.57742 -0.50114  0.03571  0.68858  1.76106 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  PLANT_RENUM (Intercept) 314.8    17.74   
##  Residual                546.1    23.37   
## Number of obs: 60, groups:  PLANT_RENUM, 6
## 
## Fixed effects:
##             Estimate Std. Error    df t value Pr(>|t|)
## (Intercept)    85.41      11.10  4.00   7.697  0.00153
## TREATMNT2      25.22      15.69  4.00   1.607  0.18329
## 
## Correlation of Fixed Effects:
##           (Intr)
## TREATMNT2 -0.707

From the summary table, PLANT contributes to:

314.8/(314.8 + 546.1)
## [1] 0.3656638

36.6% of total variance. This is quite a significant proportion of variance that the random effect accounts for, and would lead me to think that PLANT should be included as a fixed effect, and the study should consider other factors that might be affecting bacterial density.

2F. Plot the model estimated means and intervals for the model in 2C.

emm2 <- as.data.frame(emmeans(model2.2, ~ TREATMNT)) # this averages over the random effects, ie. PLANT.

ggplot(emm2, aes(x = TREATMNT, y = emmean)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.1) +
  labs(x = "Soil treatment", y = "Estimated mean density",
       title = "Fig. 2B: Model estimated means (95% CI) by soil treatment") +
  theme_minimal()

From Fig. 2D, the 95% confidence intervals show that there is weak visual evidence that soil type 2 produces higher mean bacterial density than soil type 1. This confirms the results and conclusions from above.

Question 3: split-plot designs and linear mixed-effects models

3A. In this design, identify the fixed and random effects. Explain the difference between fixed and random effects.

The random effects is Block. Blocks are randomised to all possible field conditions so that the results can be generalizable outside the study (in case certain configurations of nitrogen concentrations and oat varieties might somehow affect adjacent plots’ yields). The fixed effects are nitro and Variety.

Fixed effects are the focal predictor variables of the experiment whose effects the study is interested in, while random effects are effects that the study is not interested in, but that may potentially influence (confound) the results and thus need to be accounted for in the model.

3B. Is this study’s design crossed or nested? Explain the difference between crossed and nested designs.

This study design is both crossed and nested. First, all levels of nitro are crossed with all levels of Variety, but each of the 4 nitro levels is also nested within each of the 3 Variety levels. Second, each of the Variety levels is crossed in each of the 6 Block (all the oat varieties appear in all of the 6 blocks).

Crossed designs are where all levels of a factor experience all levels of another factor(s). Nested designs are where all levels of one factor only experience one level of another factor.

3C. Write out your full model including any interactions and the random effect structure.

The full model is Yield ~ nitro * Variety + (1|Block/Variety). Another alternative is to exclude Variety in the random effect structure: Yield ~ nitro * Variety + (1|Block).

3D. What are the number of replicates for each of the effects?

Variety: each level of oat variety is replicated 6 times in total over the 6 blocks.

Nitrogen fertilizer: there are 18 replicates of each nitrogen concentration over the 6 blocks.

Question 4: power analysis

4A. Final experimental design

mean_treated_greenhouses = 10.442 # the larger the effect size, the higher power will be. increasing effect size, keeping everything else constant, by 0.3 increases power to nearly 1. 
mean_control_greenhouses = 10.142
greenhouse_variation = 0.153
plant_variation = 0.5 # halving plant variation, keeping all other factors constant, improves power by 0.03.
number_of_treated_greenhouses = 5
number_of_control_greenhouses = 5
number_of_plants = 100

4B. Justify why the above experimental design was selected.

I choose to assume that realistically, I cannot control the means or variations of the greenhouses and plants. I also want to keep the number of treated and control greenhouses equal so the analysis can be orthogonal, and realistically, it is probably not feasible to use more than 5 greenhouses per treatment type (ie. the only variable I choose to change is the number of plants). I choose to aim for the widely accepted threshold of 80% power, which will guarantee that we expect to miss only 20% of true effects; however, I understand that in reality, a power of 70% is acceptable and more realistic for most ecological field studies. Since the goal is to maximise power with the fewest expensive factors (greenhouses), the above combination of 5 treated and 5 control greenhouses with 100 plants in each gives me a power of 73% (result is shown below) which is reasonable.

4C. Run a single simulation with your selected experimental design and analyse how tomato yield differs between treated and control greenhouses. Report and interpret your results.

# Code copied over from tutorial
results = vector()
correct = 0
runs = 10000
set.seed(42)

variation = sqrt(greenhouse_variation^2 + ((plant_variation^2)/number_of_plants))

for(i in 1:runs){
  treated <- rnorm(number_of_treated_greenhouses, mean =
                     mean_treated_greenhouses, sd = variation)
  control <- rnorm(number_of_control_greenhouses, mean =
                     mean_control_greenhouses, sd = variation)

  model4.1 <- t.test(treated, control, var.equal = TRUE)

  p <- model4.1$p.value
  if (p<0.05) correct = correct + 1
  results[i] <- p
  }

(power = correct/runs)
## [1] 0.7259

Since power is reasonably above the minimum 70% threshold, I will proceed with the analysis.

set.seed(42)
variation1 = sqrt(greenhouse_variation^2 + ((plant_variation^2)/number_of_plants))

treated1 <- rnorm(number_of_treated_greenhouses, mean =
                    mean_treated_greenhouses, sd = variation1)
control1 <- rnorm(number_of_control_greenhouses, mean =
                    mean_control_greenhouses, sd = variation1)


t.test(treated1, control1)
## 
##  Welch Two Sample t-test
## 
## data:  treated1 and control1
## t = 2.9741, df = 7.0025, p-value = 0.02068
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05449848 0.47725793
## sample estimates:
## mean of x mean of y 
##  10.51303  10.24716

Using the t-test used in the looped simulation, I observe that there is moderately strong evidence that applying Chalcid wasps to the fertiliser increases tomato yield (\(t_{7} = 2.97, \ P = 0.0207\)).

4D. Based on the analysis in 4C, plot the model estimated means and intervals.

# Combine data into one dataframe
results4 <- data.frame(Treated = treated1, Control = control1)

# Calculate group means and 95% CI
results4 <- pivot_longer(
  results4,
  cols = c(Treated, Control),
  names_to = "Treatment",
  values_to = "Value"
)

MEM4 <- results4 %>%
  group_by(Treatment) %>%
  summarise(
    Mean = mean(Value),
    SE = sd(Value)/sqrt(n()),     
    lower_CI = Mean - qt(0.975, df = n()-1) * SE,
    upper_CI = Mean + qt(0.975, df = n()-1) * SE
  )

# Plot
ggplot(MEM4, aes(x = Treatment, y = Mean, color = Treatment)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower_CI, ymax = upper_CI), width = 0.1, linewidth = 1) +
  labs(
    title = "Fig. 4A: Model-Estimated Means ± 95% CI",
    x = "Treatment",
    y = "Yield (kg)"
  ) +
  theme_minimal() +
  theme(legend.position = "none")