R square in GLMM

R square is a widely used measure of model fitness, in General Linear Models (GLM) it can be interpreted as the percent of variance in the response variable explained by the model. This measure is unitless which makes it useful to compare model between studies in meta-analysis analysis. Generalized Linear Mixed models (GLMM) are extending GLM by including an hierarchical structure in the model, indeed GLMs assume that every observation are independent from each other. In biological studies this assumption is often violated under certain experimental design, for example in repeated measurement scheme (several sample of a similar unit over time), or in field studies with Block design to account for some natural variation in soil gradient. Therefore GLMM are becoming a popular technique among biologist to account for the multilevel structure in their dataset, see Bolker et al (2009) Trends in Ecology and Evolution. However these models due to their various variance terms (ie variance at the block level, variance at the observation level) make the computation of R square problematic. A recent article by Nakagawa and Schielzeth (2013, Methods in Ecology and Evolution) present a new technique to derive R square for these model.

Below is some R-code to derive these values for gaussian, binomial and poisson GLMM, the R-code for the computation of the R square was taken from the appendix of the article. To help the comprehension let us imagine that the data correspond to some field study where was recorded in 8 plots: plant biomass (gaussian), presence of Oak species (binary/binomial), number of caught coleopterans (poisson), we think that these variables are dependent to the temperature and the vegetation type in the plot. In this exemple we have one random factor: the plots, two explanatory variable: one continuous, the temperature and one factor, the vegetation type.

# code for computing marginal and conditional R-square for gaussian,
# binomial and poisson GLMM the code for computing the R-square is taken
# from Nakagawa and Schielzeth 2013 MEE
library(lme4)

## Loading required package: lattice
## Loading required package: Matrix

library(arm)

## Loading required package: MASS
## 
## arm (Version 1.6-10, built: 2013-11-15)
## 
## Working directory is /home/lionel/Desktop/Blog

# making some simulation for gaussian data (plant biomass) depending on one
# continuous variable, one factor and one random intercept effect
temp <- runif(200, 0, 10)
veg_type <- gl(n = 4, k = 50, labels = c("A", "B", "C", "D"))
# shuffle the factor
veg_type <- veg_type[sample(1:200, size = 200, replace = FALSE)]
plot <- gl(n = 8, k = 25, labels = paste("A", 1:8, sep = ""))
modmat <- model.matrix(~i + plot + temp + veg_type - 1, data.frame(i = rep(1, 
    200), plot = plot, temp = temp, veg_type = veg_type))

# the plot effect
intercept.eff <- rnorm(8, mean = 3, sd = 2)
# put together all the effects
eff <- c(8, intercept.eff, 3, 0.3, 1.5, -4)

# compute the repsonse variable, the plant biomass
mus <- modmat %*% eff
y <- rnorm(200, mean = mus, sd = 1)
# put all in one data frame
data <- data.frame(y = y, temp = temp, veg_type = veg_type, plot = plot)

# the null model with only the plot term
mod0 <- lmer(y ~ 1 + (1 | plot), data)
# the full model
mod1 <- lmer(y ~ temp + veg_type + (1 | plot), data)
# model summary
summary(mod0)

## Linear mixed model fit by REML ['lmerMod']
## Formula: y ~ 1 + (1 | plot) 
##    Data: data 
## 
## REML criterion at convergence: 1443 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept)  5.34    2.31    
##  Residual             77.78    8.82    
## Number of obs: 200, groups: plot, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    24.19       1.03    23.5

summary(mod1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: y ~ temp + veg_type + (1 | plot) 
##    Data: data 
## 
## REML criterion at convergence: 586.1 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 4.402    2.10    
##  Residual             0.884    0.94    
## Number of obs: 200, groups: plot, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  10.3506     0.7608    13.6
## temp          2.9979     0.0242   123.7
## veg_typeB     0.2295     0.1913     1.2
## veg_typeC     1.6173     0.1911     8.5
## veg_typeD    -4.0277     0.1938   -20.8
## 
## Correlation of Fixed Effects:
##           (Intr) temp   vg_tyB vg_tyC
## temp      -0.135                     
## veg_typeB -0.106 -0.133              
## veg_typeC -0.112 -0.089  0.496       
## veg_typeD -0.116 -0.066  0.505  0.493


# compute the marginal R-square compute the variance in the fitted values
VarF <- var(as.vector(fixef(mod1) %*% t(mod1@pp$X)))
# VarCorr() extracts variance components attr(VarCorr(lmer.model),'sc')^2
# extracts the residual variance, VarCorr()$plot extract the variance of the
# plot effect
VarF/(VarF + VarCorr(mod1)$plot[1] + attr(VarCorr(mod1), "sc")^2)

## [1] 0.9351


# compute the conditionnal R-square
(VarF + VarCorr(mod1)$plot[1])/(VarF + VarCorr(mod1)$plot[1] + (attr(VarCorr(mod1), 
    "sc")^2))

## [1] 0.9891


# AIC and BIC needs to be calcualted with ML not REML in body size models
mod0ML <- lmer(y ~ 1 + (1 | plot), data, REML = FALSE)
mod1ML <- lmer(y ~ temp + veg_type + (1 | plot), data, REML = FALSE)

# View model fits for both models fitted by ML
summary(mod0ML)

## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: y ~ 1 + (1 | plot) 
##    Data: data 
## 
##      AIC      BIC   logLik deviance 
##   1451.3   1461.2   -722.6   1445.3 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept)  4.29    2.07    
##  Residual             77.78    8.82    
## Number of obs: 200, groups: plot, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   24.187      0.962    25.1

summary(mod1ML)

## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: y ~ temp + veg_type + (1 | plot) 
##    Data: data 
## 
##      AIC      BIC   logLik deviance 
##    590.6    613.6   -288.3    576.6 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 3.847    1.962   
##  Residual             0.866    0.931   
## Number of obs: 200, groups: plot, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   10.351      0.713    14.5
## temp           2.998      0.024   125.0
## veg_typeB      0.230      0.189     1.2
## veg_typeC      1.618      0.189     8.6
## veg_typeD     -4.028      0.192   -21.0
## 
## Correlation of Fixed Effects:
##           (Intr) temp   vg_tyB vg_tyC
## temp      -0.142                     
## veg_typeB -0.112 -0.133              
## veg_typeC -0.118 -0.089  0.496       
## veg_typeD -0.123 -0.066  0.505  0.493


# computing the percent of explained variance for the plot slope
1 - (VarCorr(mod1)$plot[1]^2/VarCorr(mod0)$plot[1]^2)

## [1] 0.3211

# for the residuals
1 - (var(residuals(mod1))/var(residuals(mod0)))

## [1] 0.989


# simulating binomial response data
plot.eff <- rnorm(8, 0, 2)
all.eff <- c(-0.3, plot.eff, 0.02, 0.5, -0.8, 0.8)
ps <- invlogit(modmat %*% all.eff)
ys <- rbinom(200, 1, ps)

data <- data.frame(y = ys, temp = temp, veg_type = veg_type, plot = plot)

mod0 <- glmer(y ~ 1 + (1 | plot), data, family = "binomial")
mod1 <- glmer(y ~ temp + veg_type + (1 | plot), data, family = "binomial")

VarF <- var(as.vector(fixef(mod1) %*% t(mod1@pp$X)))
# VarCorr() extracts variance components attr(VarCorr(lmer.model),'sc')^2
# extracts the residual variance, VarCorr()$plot extract the variance of the
# plot effect
VarF/(VarF + VarCorr(mod1)$plot[1] + attr(VarCorr(mod1), "sc")^2 + (pi^2)/3)

## [1] 0.06965


# compute the conditionnal R-square
(VarF + VarCorr(mod1)$plot[1])/(VarF + VarCorr(mod1)$plot[1] + (attr(VarCorr(mod1), 
    "sc")^2) + (pi^2)/3)

## [1] 0.2489



# computing the percent of explained variance for the plot slope
1 - (VarCorr(mod1)$plot[1]^2/VarCorr(mod0)$plot[1]^2)

## [1] -0.02508

# for the residuals
1 - (var(residuals(mod1))/var(residuals(mod0)))

## [1] 0.06206



# poisson data
plot.eff <- rnorm(8, 0, 2)
all.eff <- c(-0.3, plot.eff, 0.2, 0.5, -0.8, 0.8)
lambdas <- exp(modmat %*% all.eff)
ys <- rpois(200, lambdas)

data <- data.frame(y = ys, temp = temp, veg_type = veg_type, plot = plot)

mod0 <- glmer(y ~ 1 + (1 | plot), data, family = "poisson")
mod1 <- glmer(y ~ temp + veg_type + (1 | plot), data, family = "poisson")

VarF <- var(as.vector(fixef(mod1) %*% t(mod1@pp$X)))
# VarCorr() extracts variance components attr(VarCorr(lmer.model),'sc')^2
# extracts the residual variance, VarCorr()$plot extract the variance of the
# plot effect
VarF/(VarF + VarCorr(mod1)$plot[1] + attr(VarCorr(mod1), "sc")^2 + log(1 + 1/exp(as.numeric(fixef(mod0)))))

## [1] 0.223


# compute the conditionnal R-square
(VarF + VarCorr(mod1)$plot[1])/(VarF + VarCorr(mod1)$plot[1] + (attr(VarCorr(mod1), 
    "sc")^2) + log(1 + 1/exp(as.numeric(fixef(mod0)))))

## [1] 0.5487


# computing the percent of explained variance for the plot slope
1 - (VarCorr(mod1)$plot[1]^2/VarCorr(mod0)$plot[1]^2)

## [1] 0.2399

# for the residuals
1 - (var(residuals(mod1))/var(residuals(mod0)))

## [1] 0.6838