Read the data

library(readr)
dataset <- read_csv("correlated-random-effects.csv")
dataset$region <- factor(dataset$region)
dataset$country <- factor(dataset$country)
head(dataset)

##   y        x1        x2 country wave region
## 1 0 0.3299067 0.1486429      12 1995      5
## 2 0 0.3299067 0.1486429      12 1995      5
## 3 0 0.3299067 0.1486429      12 1995      5
## 4 1 0.3299067 0.1486429      12 1995      5
## 5 1 0.3299067 0.1486429      12 1995      5
## 6 0 0.3299067 0.1486429      12 1995      5

summary(dataset)

##        y                x1                x2              country      
##  Min.   :0.0000   Min.   :-1.5102   Min.   :-0.39999   65     :  8923  
##  1st Qu.:0.0000   1st Qu.:-0.0717   1st Qu.:-0.21021   74     :  6654  
##  Median :0.0000   Median : 0.1140   Median :-0.05628   17     :  6054  
##  Mean   :0.3823   Mean   : 0.0450   Mean   : 0.03466   22     :  6051  
##  3rd Qu.:1.0000   3rd Qu.: 0.2647   3rd Qu.: 0.26592   33     :  6043  
##  Max.   :1.0000   Max.   : 0.5757   Max.   : 0.97725   47     :  5459  
##                   NA's   :3009                         (Other):176395  
##       wave      region   
##  Min.   :1995   1:36645  
##  1st Qu.:1995   2:33008  
##  Median :2000   3:33999  
##  Mean   :2000   4:38303  
##  3rd Qu.:2005   5:48399  
##  Max.   :2005   6:25225  
##

Aggregate binary data

This makes the data more compact and easier to analyse.

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dataset <- dataset %>% 
  group_by(region, country, wave) %>% 
  summarise(
    Present = sum(y == 1), 
    N = n(), 
    x1 = mean(x1, na.rm = TRUE), 
    x2 = mean(x2)
  )
dataset$countrywave <- interaction(dataset$country, dataset$wave)
dataset$regionid <- as.integer(dataset$region)
dataset$regionid1 <- dataset$regionid + max(dataset$regionid)
dataset$regionid2 <- dataset$regionid1 + max(dataset$regionid)
dataset$cwave <- dataset$wave - min(dataset$wave)
n.region <- n_distinct(dataset$region)
dataset

## Source: local data frame [146 x 12]
## Groups: region, country
## 
##    region country wave Present    N          x1         x2 countrywave
## 1       1       8 1995     195 1525  0.01159922  0.2170089      8.1995
## 2       1       8 2000     211 1500 -0.04300771  0.1059284      8.2000
## 3       1      16 1995     623 1500  0.20323387 -0.1749611     16.1995
## 4       1      16 2000     259 1000  0.04124514 -0.1082685     16.2000
## 5       1      16 2005     777 2015  0.12027511 -0.1289872     16.2005
## 6       1      33 1995    1210 2040  0.19595025  0.3550759     33.1995
## 7       1      33 2000    1096 2002  0.11399926  0.2028553     33.2000
## 8       1      33 2005    1157 2001  0.14182371  0.2540212     33.2005
## 9       1      34 2000     217 1004 -0.02916449  0.9540538     34.2000
## 10      1      34 2005     527 2015  0.11945258  0.1988742     34.2005
## ..    ...     ...  ...     ...  ...         ...        ...         ...
## Variables not shown: regionid (int), regionid1 (int), regionid2 (int),
##   cwave (int)

summary(dataset)

##  region    country         wave         Present             N       
##  1:26   4      :  3   Min.   :1995   Min.   :  45.0   Min.   : 417  
##  2:24   15     :  3   1st Qu.:1995   1st Qu.: 320.5   1st Qu.:1038  
##  3:16   16     :  3   Median :2000   Median : 475.0   Median :1211  
##  4:30   33     :  3   Mean   :2000   Mean   : 564.5   Mean   :1477  
##  5:36   39     :  3   3rd Qu.:2005   3rd Qu.: 702.2   3rd Qu.:1874  
##  6:14   47     :  3   Max.   :2005   Max.   :2369.0   Max.   :3401  
##         (Other):128                                                 
##        x1                   x2             countrywave     regionid    
##  Min.   :-1.5102302   Min.   :-0.399994   1.1995 :  1   Min.   :1.000  
##  1st Qu.:-0.1579904   1st Qu.:-0.212392   4.1995 :  1   1st Qu.:2.000  
##  Median : 0.0728662   Median :-0.120049   5.1995 :  1   Median :4.000  
##  Mean   : 0.0009505   Mean   : 0.002102   6.1995 :  1   Mean   :3.466  
##  3rd Qu.: 0.2099507   3rd Qu.: 0.197752   7.1995 :  1   3rd Qu.:5.000  
##  Max.   : 0.5757258   Max.   : 0.977251   8.1995 :  1   Max.   :6.000  
##  NA's   :3                                (Other):140                  
##    regionid1        regionid2         cwave       
##  Min.   : 7.000   Min.   :13.00   Min.   : 0.000  
##  1st Qu.: 8.000   1st Qu.:14.00   1st Qu.: 0.000  
##  Median :10.000   Median :16.00   Median : 5.000  
##  Mean   : 9.466   Mean   :15.47   Mean   : 5.068  
##  3rd Qu.:11.000   3rd Qu.:17.00   3rd Qu.:10.000  
##  Max.   :12.000   Max.   :18.00   Max.   :10.000  
##

Analyse the model with `lme4`

library(lme4)

## Loading required package: Matrix
## Loading required package: Rcpp

system.time(
  model <- glmer(
    cbind(Present, N - Present) ~ 
      x1 + x2 + (1 + x1 + x2 | region) + (1 | country) + (1 | countrywave), 
    data = dataset, 
    family = binomial
  )
)

##    user  system elapsed 
##    1.43    0.03    1.46

model

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(Present, N - Present) ~ x1 + x2 + (1 + x1 + x2 | region) +  
##     (1 | country) + (1 | countrywave)
##    Data: dataset
##       AIC       BIC    logLik  deviance  df.resid 
## 1907.0354 1939.6267 -942.5177 1885.0354       132 
## Random effects:
##  Groups      Name        Std.Dev. Corr       
##  countrywave (Intercept) 0.3989              
##  country     (Intercept) 0.5740              
##  region      (Intercept) 0.1283              
##              x1          0.1537    1.00      
##              x2          0.3741   -1.00 -1.00
## Number of obs: 143, groups:  countrywave, 143; country, 82; region, 6
## Fixed Effects:
## (Intercept)           x1           x2  
##    -0.60707      0.48900      0.04428

This gives perfectly correlated random effects because there is not enough information in the dataset to estimate random slopes at the region level.

Analyse the model with `INLA`

library(INLA)

## Loading required package: sp
## Loading required package: splines

system.time(
  m1 <- inla(
    Present ~ x1 + x2 + 
        # INLA equivalent of (1 + x1 + x2|region)
          f(regionid, model = "iid3d", n = 3 * n.region) + 
          f(regionid1, x1, copy = "regionid") + 
          f(regionid2, x2, copy = "regionid") + 
        # INLA equivalent of (1 + x1 + x2|region)
      f(country, model = "iid") + 
      f(countrywave, model = "iid"),
    Ntrials = N,
    data = dataset,
    family = "binomial",
    control.compute = list(dic = TRUE)
  )
)

##    user  system elapsed 
##    0.28    0.28    6.27

summary(m1)

## 
## Call:
## c("inla(formula = Present ~ x1 + x2 + f(regionid, model = \"iid3d\", ",  "    n = 3 * n.region) + f(regionid1, x1, copy = \"regionid\") + ",  "    f(regionid2, x2, copy = \"regionid\") + f(country, model = \"iid\") + ",  "    f(countrywave, model = \"iid\"), family = \"binomial\", data = dataset, ",  "    Ntrials = N, control.compute = list(dic = TRUE))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3286          5.3044          0.5158          6.1488 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept) -0.6029 0.1763    -0.9539  -0.6025    -0.2546 -0.6018   0
## x1           0.3687 0.3281    -0.2738   0.3673     1.0180  0.3641   0
## x2          -0.0331 0.2720    -0.5664  -0.0343     0.5060 -0.0365   0
## 
## Random effects:
## Name   Model
##  regionid   IID3D model 
## country   IID model 
## countrywave   IID model 
## regionid1   Copy 
## regionid2   Copy 
## 
## Model hyperparameters:
##                                         mean     sd 0.025quant 0.5quant
## Precision for regionid (component 1)  9.1452 3.9305     3.4600   8.5140
## Precision for regionid (component 2)  7.9376 3.8760     2.6467   7.2175
## Precision for regionid (component 3)  7.6234 3.6925     2.5969   6.9320
## Rho1:2 for regionid                   0.1024 0.2468    -0.3540   0.0941
## Rho1:3 for regionid                  -0.0299 0.2502    -0.4837  -0.0420
## Rho2:3 for regionid                  -0.1192 0.2655    -0.6309  -0.1142
## Precision for countrywave             6.4745 1.3460     4.1531   6.3722
## Precision for country                 3.1922 0.8239     1.9314   3.0701
##                                      0.975quant    mode
## Precision for regionid (component 1)    18.5860  7.2457
## Precision for regionid (component 2)    17.4792  5.8134
## Precision for regionid (component 3)    16.7417  5.6161
## Rho1:2 for regionid                      0.5888  0.0541
## Rho1:3 for regionid                      0.4758 -0.0853
## Rho2:3 for regionid                      0.3831 -0.0821
## Precision for countrywave                9.4186  6.1936
## Precision for country                    5.1355  2.8338
## 
## Expected number of effective parameters(std dev): 144.14(0.2934)
## Number of equivalent replicates : 1.013 
## 
## Deviance Information Criterion: 1382.28
## Effective number of parameters: 144.15
## 
## Marginal Likelihood:  -1333.98 
## Posterior marginals for linear predictor and fitted values computed

plot(
  m1$marginals.hyperpar$`Rho1:2 for regionid`, 
  type = "l", 
  main = "Correlation intercept : x1"
)

plot(
  m1$marginals.hyperpar$`Rho1:3 for regionid`, 
  type = "l", 
  main = "Correlation intercept : x2"
)

plot(
  m1$marginals.hyperpar$`Rho2:3 for regionid`, 
  type = "l", 
  main = "Correlation x1 : x2"
)

plot(m1)

Note that the correlations are now around zero with rather wide credible intervals.

plot(m1) doesn’t work within a Markdown file. It works fine in an interactive session.

INLA is a bit slower than lme4, but still very fast for a Bayesian technique.

Alternative models

# Remove random effect of region
m2 <- inla(
  Present ~ x1 + x2 + f(country, model = "iid") + f(countrywave, model = "iid"),
  Ntrials = N,
  data = dataset,
  family = "binomial",
  control.compute = list(dic = TRUE)
)
# Use a random walk along wave per country
m3 <- inla(
  Present ~ x1 + x2 + f(country, model = "iid") + f(cwave, replicate = as.integer(country), model = "rw1"),
  Ntrials = N,
  data = dataset,
  family = "binomial",
  control.compute = list(dic = TRUE)
)
# Remove random effect of countr:wave
m4 <- inla(
  Present ~ x1 + x2 + f(country, model = "iid"),
  Ntrials = N,
  data = dataset,
  family = "binomial",
  control.compute = list(dic = TRUE)
)
c(m1$dic$dic, m2$dic$dic, m3$dic$dic, m4$dic$dic)

## [1] 1382.277 1382.260 1382.677 4414.068

Based on the DIC criterion, there is not that much difference between the models with and without the region effects. The effect of wave per country is important.

Random slopes with perfect correlation

Thierry Onkelinx

19 juli 2015

Read the data

Aggregate binary data

Analyse the model with `lme4`

Analyse the model with `INLA`

Alternative models

Random slopes with perfect correlation

Thierry Onkelinx

19 juli 2015

Read the data

Aggregate binary data

Analyse the model with lme4

Analyse the model with INLA

Alternative models

Analyse the model with `lme4`

Analyse the model with `INLA`