# Mixed-effects model
# load multiple libraries required for the analysis
# pacman::p_load() function can load multiple libraries with automatic installation

pacman::p_load(readxl, car, sjstats, jtools, tidyverse, lme4, lmerTest)

# Read the data using File > Import dataset

plum = read_excel('datasets.xlsx', sheet = 'anova', range = 'ac13:af45')
head(plum)
## # A tibble: 6 × 4
##   Replication Irrigation Variety Yield
##         <dbl> <chr>      <chr>   <dbl>
## 1           1 control    A         8.9
## 2           1 control    B         9.5
## 3           1 control    C        11.7
## 4           1 control    D        15  
## 5           1 new        A        16.7
## 6           1 new        B        14.6
factors = c('Replication', 'Irrigation', 'Variety')

plum[factors] = lapply(plum[factors], as.factor)

table(plum$Irrigation, plum$Variety)
##          
##           A B C D
##   control 4 4 4 4
##   new     4 4 4 4
ggplot(plum, aes(x = Variety, y = Replication, color = Replication)) +
  geom_tile() +
  facet_grid(~ Irrigation) 

# Fit anova
fixed = aov(Yield ~ Replication + Irrigation * Variety, data = plum)

# Replication and interactions are non-significant

fixed = aov(Yield ~ Irrigation + Variety, plum)
summary(fixed)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Irrigation   1 192.08  192.08   90.50 4.10e-10 ***
## Variety      3  96.43   32.14   15.14 5.64e-06 ***
## Residuals   27  57.31    2.12                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mixed = lmer(Yield ~ Irrigation + Variety + (1|Irrigation), data = plum)
## Warning in as_lmerModLT(model, devfun): Model may not have converged with 1
## eigenvalue close to zero: 5.7e-10
summary(mixed)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Yield ~ Irrigation + Variety + (1 | Irrigation)
##    Data: plum
## 
## REML criterion at convergence: 107.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.73316 -0.53196  0.05577  0.49764  2.31659 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  Irrigation (Intercept) 8.121    2.850   
##  Residual               2.122    1.457   
## Number of obs: 32, groups:  Irrigation, 2
## 
## Fixed effects:
##               Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)    10.9250     2.9074 27.0000   3.758 0.000837 ***
## Irrigationnew   4.9000     4.0630 27.0000   1.206 0.238275    
## VarietyB       -1.8000     0.7284 27.0000  -2.471 0.020074 *  
## VarietyC        1.0875     0.7284 27.0000   1.493 0.147053    
## VarietyD        2.9875     0.7284 27.0000   4.101 0.000338 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Irrgtn VartyB VartyC
## Irrigatinnw -0.699                     
## VarietyB    -0.125  0.000              
## VarietyC    -0.125  0.000  0.500       
## VarietyD    -0.125  0.000  0.500  0.500
# random part
8.121 / (8.121+2.122) * 100
## [1] 79.28341
# 79.28% of the variation in the random part (not explained by the fixed part) explained by irrigation

# fixed effect R2
jtools::summ(mixed)
Observations 32
Dependent variable Yield
Type Mixed effects linear regression
AIC 121.34
BIC 131.60
Pseudo-R² (fixed effects) 0.48
Pseudo-R² (total) 0.89
Fixed Effects
Est. S.E. t val. d.f. p
(Intercept) 10.92 2.91 3.76 27.00 0.00
Irrigationnew 4.90 4.06 1.21 27.00 0.24
VarietyB -1.80 0.73 -2.47 27.00 0.02
VarietyC 1.09 0.73 1.49 27.00 0.15
VarietyD 2.99 0.73 4.10 27.00 0.00
p values calculated using Satterthwaite d.f.
Random Effects
Group Parameter Std. Dev.
Irrigation (Intercept) 2.85
Residual 1.46
Grouping Variables
Group # groups ICC
Irrigation 2 0.79
# 48% explained by the fixed part (not exactly like the R2)
# 52% unexplained

# anova of mixed model
anova(mixed)
## Type III Analysis of Variance Table with Satterthwaite's method
##            Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Irrigation  3.087   3.087     1    27  1.4545    0.2383    
## Variety    96.431  32.144     3    27 15.1443 5.637e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# comparing the fixed and mixed model
anova(mixed, fixed)
## refitting model(s) with ML (instead of REML)
## Data: plum
## Models:
## fixed: Yield ~ Irrigation + Variety
## mixed: Yield ~ Irrigation + Variety + (1 | Irrigation)
##       npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## fixed    6 121.46 130.25 -54.729   109.46                    
## mixed    7 123.46 133.72 -54.729   109.46     0  1          1
# AIC (Akaike Information Criteria), BIC (Bayesian Information Criteria) == loss information
# Lower AIC or BIC is better
# Is the difference between the AIC or BIC of fixed and mixed models are significant?
# p > 0.05, sufficient evidence to support the H0 (fixed = mixed)
# Principle of parsimony: simple is better
# fixed model (usual anova) should be used.