mixed.R

# Mixed-effects model
# load multiple libraries required for the analysis
# pacman::p_load() function can load multiple libraries with automatic installation

pacman::p_load(readxl, car, sjstats, jtools, tidyverse, lme4, lmerTest)

# Read the data using File > Import dataset

plum = read_excel('datasets.xlsx', sheet = 'anova', range = 'ac13:af45')
head(plum)

## # A tibble: 6 × 4
##   Replication Irrigation Variety Yield
##         <dbl> <chr>      <chr>   <dbl>
## 1           1 control    A         8.9
## 2           1 control    B         9.5
## 3           1 control    C        11.7
## 4           1 control    D        15  
## 5           1 new        A        16.7
## 6           1 new        B        14.6

factors = c('Replication', 'Irrigation', 'Variety')

plum[factors] = lapply(plum[factors], as.factor)

table(plum$Irrigation, plum$Variety)

##          
##           A B C D
##   control 4 4 4 4
##   new     4 4 4 4

ggplot(plum, aes(x = Variety, y = Replication, color = Replication)) +
  geom_tile() +
  facet_grid(~ Irrigation)

# Fit anova
fixed = aov(Yield ~ Replication + Irrigation * Variety, data = plum)

# Replication and interactions are non-significant

fixed = aov(Yield ~ Irrigation + Variety, plum)
summary(fixed)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Irrigation   1 192.08  192.08   90.50 4.10e-10 ***
## Variety      3  96.43   32.14   15.14 5.64e-06 ***
## Residuals   27  57.31    2.12                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mixed = lmer(Yield ~ Irrigation + Variety + (1|Irrigation), data = plum)

## Warning in as_lmerModLT(model, devfun): Model may not have converged with 1
## eigenvalue close to zero: 5.7e-10

summary(mixed)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Yield ~ Irrigation + Variety + (1 | Irrigation)
##    Data: plum
## 
## REML criterion at convergence: 107.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.73316 -0.53196  0.05577  0.49764  2.31659 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  Irrigation (Intercept) 8.121    2.850   
##  Residual               2.122    1.457   
## Number of obs: 32, groups:  Irrigation, 2
## 
## Fixed effects:
##               Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)    10.9250     2.9074 27.0000   3.758 0.000837 ***
## Irrigationnew   4.9000     4.0630 27.0000   1.206 0.238275    
## VarietyB       -1.8000     0.7284 27.0000  -2.471 0.020074 *  
## VarietyC        1.0875     0.7284 27.0000   1.493 0.147053    
## VarietyD        2.9875     0.7284 27.0000   4.101 0.000338 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Irrgtn VartyB VartyC
## Irrigatinnw -0.699                     
## VarietyB    -0.125  0.000              
## VarietyC    -0.125  0.000  0.500       
## VarietyD    -0.125  0.000  0.500  0.500

# random part
8.121 / (8.121+2.122) * 100

## [1] 79.28341

# 79.28% of the variation in the random part (not explained by the fixed part) explained by irrigation

# fixed effect R2
jtools::summ(mixed)

Observations	32
Dependent variable	Yield
Type	Mixed effects linear regression

AIC	121.34
BIC	131.60
Pseudo-R² (fixed effects)	0.48
Pseudo-R² (total)	0.89

Fixed Effects
	Est.	S.E.	t val.	d.f.	p
(Intercept)	10.92	2.91	3.76	27.00	0.00
Irrigationnew	4.90	4.06	1.21	27.00	0.24
VarietyB	-1.80	0.73	-2.47	27.00	0.02
VarietyC	1.09	0.73	1.49	27.00	0.15
VarietyD	2.99	0.73	4.10	27.00	0.00
p values calculated using Satterthwaite d.f.

Random Effects
Group	Parameter	Std. Dev.
Irrigation	(Intercept)	2.85
Residual		1.46

Grouping Variables
Group	# groups	ICC
Irrigation	2	0.79

# 48% explained by the fixed part (not exactly like the R2)
# 52% unexplained

# anova of mixed model
anova(mixed)

## Type III Analysis of Variance Table with Satterthwaite's method
##            Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Irrigation  3.087   3.087     1    27  1.4545    0.2383    
## Variety    96.431  32.144     3    27 15.1443 5.637e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# comparing the fixed and mixed model
anova(mixed, fixed)

## refitting model(s) with ML (instead of REML)

## Data: plum
## Models:
## fixed: Yield ~ Irrigation + Variety
## mixed: Yield ~ Irrigation + Variety + (1 | Irrigation)
##       npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## fixed    6 121.46 130.25 -54.729   109.46                    
## mixed    7 123.46 133.72 -54.729   109.46     0  1          1

# AIC (Akaike Information Criteria), BIC (Bayesian Information Criteria) == loss information
# Lower AIC or BIC is better
# Is the difference between the AIC or BIC of fixed and mixed models are significant?
# p > 0.05, sufficient evidence to support the H0 (fixed = mixed)
# Principle of parsimony: simple is better
# fixed model (usual anova) should be used.

mixed.R

hp

2025-02-23