Aggregation issue

version: 2012-12-17 15:32:16

set.seed(101)
n <- 10000
d <- data.frame(A = factor(sample(1:30, size = n, replace = TRUE)), B = factor(sample(1:30, 
    size = n, replace = TRUE)))
u1 <- rnorm(30, sd = 2)
u2 <- rnorm(30, sd = 3)
d <- within(d, {
    mu <- plogis(u1[A] + u2[B] + 1)
    n1 <- rbinom(n, size = 20, prob = mu)
    n2 <- 20 - n1
})

Using lme4.0 0.999999-1 from r-forge, which should (?) be identical to CRAN stable (version 0.999999-0):

library(lme4.0)
m1 <- glmer(cbind(n1, n2) ~ 1 + (1 | A) + (1 | B), data = d, family = binomial)

Aggregate (using aggregate or the data.table package would probably be faster, but this is how I'm used to doing it):

library(plyr)
d2 <- ddply(d, c("A", "B"), summarize, n1 = sum(n1), n2 = sum(n2))
nrow(d2)
## [1] 900

Refit (could use update?)

m2 <- glmer(cbind(n1, n2) ~ 1 + (1 | A) + (1 | B), data = d2, family = binomial)

Compare results: not identical but extremely close:

op_old <- options(digits = 12)
cbind(unlist(VarCorr(m1)), unlist(VarCorr(m2)))
##             [,1]           [,2]
## A  4.71354585298  4.71342797413
## B 11.36626376926 11.36629565846
c(fixef(m1), fixef(m2))
##    (Intercept)    (Intercept) 
## 0.806634111027 0.807105111043

The differences in the random effects are very small, but a bit weird: each set of random effects is off by a (very small) constant.

summary(ranef(m1)$A - ranef(m2)$A)
##   (Intercept)            
##  Min.   :0.000137880213  
##  1st Qu.:0.000137914476  
##  Median :0.000137942095  
##  Mean   :0.000137936914  
##  3rd Qu.:0.000137954918  
##  Max.   :0.000138005162
summary(ranef(m1)$B - ranef(m2)$B)
##   (Intercept)            
##  Min.   :0.000324881888  
##  1st Qu.:0.000332971946  
##  Median :0.000333017590  
##  Mean   :0.000332715389  
##  3rd Qu.:0.000333030296  
##  Max.   :0.000333038935