Testing differences between levels

Packages

library(lme4)
library(brms)
library(lattice) # diagnostic plots
library(ggplot2) # pretty plots
library(boot)
library(multcomp)
library(sjPlot) # caterpillar plots

Data (modified from here)

dat <- data.frame(
    id  = rep(1:28, 3), 
    day       = rep(c(1, 3, 6), each=28), 
    treatment = rep(c("B", "A", "C", "B", "C", "A", "A", "B", "A", "C", "B", "C", 
                        "A", "A", "B", "A", "C", "B", "C", "A", "A"), each = 4), 
    outcome   = c(6.472687, 7.017110, 6.200715, 6.613928, 6.829968, 7.387583, 7.367293, 
                  8.018853, 7.527408, 6.746739, 7.296910, 6.983360, 6.816621, 6.571689, 
                  5.911261, 6.954988, 7.624122, 7.669865, 7.676225, 7.263593, 7.704737, 
                  7.328716, 7.295610, 5.964180, 6.880814, 6.926342, 6.926342, 7.562293, 
                  6.677607, 7.023526, 6.441864, 7.020875, 7.478931, 7.495336, 7.427709, 
                  7.633020, 7.382091, 7.359731, 7.285889, 7.496863, 6.632403, 6.171196, 
                  6.306012, 7.253833, 7.594852, 6.915225, 7.220147, 7.298227, 7.573612, 
                  7.366550, 7.560513, 7.289078, 7.287802, 7.155336, 7.394452, 7.465383, 
                  6.976048, 7.222966, 6.584153, 7.013223, 7.569905, 7.459185, 7.504068, 
                  7.801867, 7.598728, 7.475841, 7.511873, 7.518384, 6.618589, 5.854754, 
                  6.125749, 6.962720, 7.540600, 7.379861, 7.344189, 7.362815, 7.805802, 
                  7.764172, 7.789844, 7.616437, NA, NA, NA, NA))

contrasts(dat$treatment) <- 'contr.sum'

Models

Using sum contrasts with the ~ 0 + ... formula format gives us the means for each group. For now, I’m going to ignore the potential for interaction between the continuous and categorical predictors (because it’s too much reading).

f1 <- outcome ~ day + treatment + (1|id)
f2 <- outcome ~ 0 + day + treatment + (1|id)
m1 <- lmer(f1, data = dat, REML = FALSE)
m2 <- update(m1, formula = f2)

These models are equivalent, but use slightly different parameterizations.

Some diagnostics

We can check the distribution of the conditional means (modes, in a more general sense) of the random effects using sjPlot::sjp.lmer. The idea is that if the means are approximately normally distributed (one of the assumptions made by this sort of model), we’re doing ok.

sjp.lmer(m1, type = 're.qq')

They look just fine. This (I think?) can also be a way to get a sense of the utility of a particular random effect term. If all of the conditional means are hanging around zero (and you don’t have a good reason for keeping the term around), this may suggest that there is not enough variability to estimate the random effect variance, and the term could be dropped.

The residuals should also be approx. normally distributed.

qqnorm(resid(m1))
qqline(resid(m1))

A few potentially problematic ones, but nothing systematic.

Inference

There are (at least) three ways to calculate confidence intervals for each parameter.

Wald: easy to calculate, depend on a quadratic likelihood surface and the sampling distribution of the log-likelihood is proportional to \(\chi_2\)
Profile: Takes a little longer, ignores finite-size effects
Bootstrap: Assumes model is correct, normally distributed random effects and residuals, data are conditionally independent

In this case, all three give nearly identical answers.

# profile the likelihood

m2prof <- profile(m2, which = 'beta_')
xyplot(m2prof, absVal = TRUE) # if straight lines, then quadratic surface

c1 <- data.frame(confint(m2prof), label = 'profile')
c1$coef <- rownames(c1)

c2 <- data.frame(confint(m2, method = 'Wald', parm = 'beta_'), label = 'Wald')
c2$coef <- rownames(c2)

c3 <- data.frame(confint(m2, method = 'boot', parm = 'beta_',
        boot.type = 'perc', nsim = 1000), label = 'bootstrap')
c3$coef <- rownames(c3)

conf_frame <- rbind(c1, c2, c3)
colnames(conf_frame) <- c('lowci', 'hici', 'type', 'coef_name')
row.names(conf_frame) <- NULL

Shown below are confidence intervals for each parameter and method. There are no substantial differences.

ggplot(conf_frame[conf_frame$lowci < 5,],
       aes(x = coef_name, colour = type)) + 
    geom_linerange(aes(ymin = lowci, ymax = hici),
                   size = 2, 
                   position = position_dodge(width = 0.6))

ggplot(conf_frame[conf_frame$lowci > 5,],
       aes(x = coef_name, colour = type)) + 
    geom_linerange(aes(ymin = lowci, ymax = hici),
                   size = 2, 
                   position = position_dodge(width = 0.6))

Multiple comparisons

multcomp is one package for multiple comparisons that is compatible with lme4.

glht(m1, linfct = mcp(treatment = 'Tukey')) # doesn't like the 0-intercept

## 
##   General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Linear Hypotheses:
##            Estimate
## B - A == 0 -0.75732
## C - A == 0 -0.02278
## C - B == 0  0.73454

# or glht(m1, linfct = mcp(treatment = c('B - A == 0', 'C - A == 0')))...

See ?summary.glht for different adjustments.

A slightly more manual procedure is available through bootMer(). This method takes awhile (roughly nsim * original fit time). However, this can be parallelized trivially (set parallel = 'multicore').

bootfun <- function(.) {
    fx <- fixef(.)
    bmina <- fx[3] - fx[2]
    cmina <- fx[4] - fx[2]
    cminb <- fx[4] - fx[3]
    
    c(bmina, cmina, cminb)
    
}

set.seed(1)

system.time(boo1 <- bootMer(m2, bootfun, nsim = 1000))

##    user  system elapsed 
##    7.99    0.02    8.00

# example parameter with percentile-based confidence intervals
boot.ci(boo1, index = 1, type = 'perc')

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boo1, type = "perc", index = 1)
## 
## Intervals : 
## Level     Percentile     
## 95%   (-0.9659, -0.5430 )  
## Calculations and Intervals on Original Scale

From here, we can calculate a null distribution (subtract out the mean), and figure out p-values. On top of that, we can get a sense of the effect size.

brms

brms is a relatively new package that serves as an interface to the Stan language. It can fit a much wider variety of models (nonlinear, some multivariate, …). However, the model must be compiled into C++ whenever it is changed (which takes awhile).

m1brm <- brm(f2, family = 'gaussian', data = dat)

## 
## SAMPLING FOR MODEL 'gaussian(identity) brms-model' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)# 
## #  Elapsed Time: 0.537 seconds (Warm-up)
## #                0.162 seconds (Sampling)
## #                0.699 seconds (Total)
## # 
## 
## SAMPLING FOR MODEL 'gaussian(identity) brms-model' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)# 
## #  Elapsed Time: 0.59 seconds (Warm-up)
## #                0.194 seconds (Sampling)
## #                0.784 seconds (Total)
## # 
## 
## SAMPLING FOR MODEL 'gaussian(identity) brms-model' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)# 
## #  Elapsed Time: 0.616 seconds (Warm-up)
## #                0.19 seconds (Sampling)
## #                0.806 seconds (Total)
## # 
## 
## SAMPLING FOR MODEL 'gaussian(identity) brms-model' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)# 
## #  Elapsed Time: 0.551 seconds (Warm-up)
## #                0.175 seconds (Sampling)
## #                0.726 seconds (Total)
## #

hyp <- hypothesis(m1brm, 'treatmentB - treatmentA = 0')
hyp

## Hypothesis Tests for class b:
##                          Estimate Est.Error l-95% CI u-95% CI Evid.Ratio  
## treatmentB-treatm... = 0    -0.76      0.13    -1.03    -0.51         NA *
## ---
## '*': The expected value under the hypothesis lies outside the 95% CI.

plot(hyp)