library(readr)
dataset <- read_csv("correlated-random-effects.csv")
dataset$region <- factor(dataset$region)
dataset$country <- factor(dataset$country)
head(dataset)
## y x1 x2 country wave region
## 1 0 0.3299067 0.1486429 12 1995 5
## 2 0 0.3299067 0.1486429 12 1995 5
## 3 0 0.3299067 0.1486429 12 1995 5
## 4 1 0.3299067 0.1486429 12 1995 5
## 5 1 0.3299067 0.1486429 12 1995 5
## 6 0 0.3299067 0.1486429 12 1995 5
summary(dataset)
## y x1 x2 country
## Min. :0.0000 Min. :-1.5102 Min. :-0.39999 65 : 8923
## 1st Qu.:0.0000 1st Qu.:-0.0717 1st Qu.:-0.21021 74 : 6654
## Median :0.0000 Median : 0.1140 Median :-0.05628 17 : 6054
## Mean :0.3823 Mean : 0.0450 Mean : 0.03466 22 : 6051
## 3rd Qu.:1.0000 3rd Qu.: 0.2647 3rd Qu.: 0.26592 33 : 6043
## Max. :1.0000 Max. : 0.5757 Max. : 0.97725 47 : 5459
## NA's :3009 (Other):176395
## wave region
## Min. :1995 1:36645
## 1st Qu.:1995 2:33008
## Median :2000 3:33999
## Mean :2000 4:38303
## 3rd Qu.:2005 5:48399
## Max. :2005 6:25225
##
This makes the data more compact and easier to analyse.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dataset <- dataset %>%
group_by(region, country, wave) %>%
summarise(
Present = sum(y == 1),
N = n(),
x1 = mean(x1, na.rm = TRUE),
x2 = mean(x2)
)
dataset$countrywave <- interaction(dataset$country, dataset$wave)
dataset$regionid <- as.integer(dataset$region)
dataset$regionid1 <- dataset$regionid + max(dataset$regionid)
dataset$regionid2 <- dataset$regionid1 + max(dataset$regionid)
dataset$cwave <- dataset$wave - min(dataset$wave)
n.region <- n_distinct(dataset$region)
dataset
## Source: local data frame [146 x 12]
## Groups: region, country
##
## region country wave Present N x1 x2 countrywave
## 1 1 8 1995 195 1525 0.01159922 0.2170089 8.1995
## 2 1 8 2000 211 1500 -0.04300771 0.1059284 8.2000
## 3 1 16 1995 623 1500 0.20323387 -0.1749611 16.1995
## 4 1 16 2000 259 1000 0.04124514 -0.1082685 16.2000
## 5 1 16 2005 777 2015 0.12027511 -0.1289872 16.2005
## 6 1 33 1995 1210 2040 0.19595025 0.3550759 33.1995
## 7 1 33 2000 1096 2002 0.11399926 0.2028553 33.2000
## 8 1 33 2005 1157 2001 0.14182371 0.2540212 33.2005
## 9 1 34 2000 217 1004 -0.02916449 0.9540538 34.2000
## 10 1 34 2005 527 2015 0.11945258 0.1988742 34.2005
## .. ... ... ... ... ... ... ... ...
## Variables not shown: regionid (int), regionid1 (int), regionid2 (int),
## cwave (int)
summary(dataset)
## region country wave Present N
## 1:26 4 : 3 Min. :1995 Min. : 45.0 Min. : 417
## 2:24 15 : 3 1st Qu.:1995 1st Qu.: 320.5 1st Qu.:1038
## 3:16 16 : 3 Median :2000 Median : 475.0 Median :1211
## 4:30 33 : 3 Mean :2000 Mean : 564.5 Mean :1477
## 5:36 39 : 3 3rd Qu.:2005 3rd Qu.: 702.2 3rd Qu.:1874
## 6:14 47 : 3 Max. :2005 Max. :2369.0 Max. :3401
## (Other):128
## x1 x2 countrywave regionid
## Min. :-1.5102302 Min. :-0.399994 1.1995 : 1 Min. :1.000
## 1st Qu.:-0.1579904 1st Qu.:-0.212392 4.1995 : 1 1st Qu.:2.000
## Median : 0.0728662 Median :-0.120049 5.1995 : 1 Median :4.000
## Mean : 0.0009505 Mean : 0.002102 6.1995 : 1 Mean :3.466
## 3rd Qu.: 0.2099507 3rd Qu.: 0.197752 7.1995 : 1 3rd Qu.:5.000
## Max. : 0.5757258 Max. : 0.977251 8.1995 : 1 Max. :6.000
## NA's :3 (Other):140
## regionid1 regionid2 cwave
## Min. : 7.000 Min. :13.00 Min. : 0.000
## 1st Qu.: 8.000 1st Qu.:14.00 1st Qu.: 0.000
## Median :10.000 Median :16.00 Median : 5.000
## Mean : 9.466 Mean :15.47 Mean : 5.068
## 3rd Qu.:11.000 3rd Qu.:17.00 3rd Qu.:10.000
## Max. :12.000 Max. :18.00 Max. :10.000
##
lme4
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
system.time(
model <- glmer(
cbind(Present, N - Present) ~
x1 + x2 + (1 + x1 + x2 | region) + (1 | country) + (1 | countrywave),
data = dataset,
family = binomial
)
)
## user system elapsed
## 1.43 0.03 1.46
model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Present, N - Present) ~ x1 + x2 + (1 + x1 + x2 | region) +
## (1 | country) + (1 | countrywave)
## Data: dataset
## AIC BIC logLik deviance df.resid
## 1907.0354 1939.6267 -942.5177 1885.0354 132
## Random effects:
## Groups Name Std.Dev. Corr
## countrywave (Intercept) 0.3989
## country (Intercept) 0.5740
## region (Intercept) 0.1283
## x1 0.1537 1.00
## x2 0.3741 -1.00 -1.00
## Number of obs: 143, groups: countrywave, 143; country, 82; region, 6
## Fixed Effects:
## (Intercept) x1 x2
## -0.60707 0.48900 0.04428
This gives perfectly correlated random effects because there is not enough information in the dataset to estimate random slopes at the region level.
INLA
library(INLA)
## Loading required package: sp
## Loading required package: splines
system.time(
m1 <- inla(
Present ~ x1 + x2 +
# INLA equivalent of (1 + x1 + x2|region)
f(regionid, model = "iid3d", n = 3 * n.region) +
f(regionid1, x1, copy = "regionid") +
f(regionid2, x2, copy = "regionid") +
# INLA equivalent of (1 + x1 + x2|region)
f(country, model = "iid") +
f(countrywave, model = "iid"),
Ntrials = N,
data = dataset,
family = "binomial",
control.compute = list(dic = TRUE)
)
)
## user system elapsed
## 0.28 0.28 6.27
summary(m1)
##
## Call:
## c("inla(formula = Present ~ x1 + x2 + f(regionid, model = \"iid3d\", ", " n = 3 * n.region) + f(regionid1, x1, copy = \"regionid\") + ", " f(regionid2, x2, copy = \"regionid\") + f(country, model = \"iid\") + ", " f(countrywave, model = \"iid\"), family = \"binomial\", data = dataset, ", " Ntrials = N, control.compute = list(dic = TRUE))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3286 5.3044 0.5158 6.1488
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -0.6029 0.1763 -0.9539 -0.6025 -0.2546 -0.6018 0
## x1 0.3687 0.3281 -0.2738 0.3673 1.0180 0.3641 0
## x2 -0.0331 0.2720 -0.5664 -0.0343 0.5060 -0.0365 0
##
## Random effects:
## Name Model
## regionid IID3D model
## country IID model
## countrywave IID model
## regionid1 Copy
## regionid2 Copy
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for regionid (component 1) 9.1452 3.9305 3.4600 8.5140
## Precision for regionid (component 2) 7.9376 3.8760 2.6467 7.2175
## Precision for regionid (component 3) 7.6234 3.6925 2.5969 6.9320
## Rho1:2 for regionid 0.1024 0.2468 -0.3540 0.0941
## Rho1:3 for regionid -0.0299 0.2502 -0.4837 -0.0420
## Rho2:3 for regionid -0.1192 0.2655 -0.6309 -0.1142
## Precision for countrywave 6.4745 1.3460 4.1531 6.3722
## Precision for country 3.1922 0.8239 1.9314 3.0701
## 0.975quant mode
## Precision for regionid (component 1) 18.5860 7.2457
## Precision for regionid (component 2) 17.4792 5.8134
## Precision for regionid (component 3) 16.7417 5.6161
## Rho1:2 for regionid 0.5888 0.0541
## Rho1:3 for regionid 0.4758 -0.0853
## Rho2:3 for regionid 0.3831 -0.0821
## Precision for countrywave 9.4186 6.1936
## Precision for country 5.1355 2.8338
##
## Expected number of effective parameters(std dev): 144.14(0.2934)
## Number of equivalent replicates : 1.013
##
## Deviance Information Criterion: 1382.28
## Effective number of parameters: 144.15
##
## Marginal Likelihood: -1333.98
## Posterior marginals for linear predictor and fitted values computed
plot(
m1$marginals.hyperpar$`Rho1:2 for regionid`,
type = "l",
main = "Correlation intercept : x1"
)
plot(
m1$marginals.hyperpar$`Rho1:3 for regionid`,
type = "l",
main = "Correlation intercept : x2"
)
plot(
m1$marginals.hyperpar$`Rho2:3 for regionid`,
type = "l",
main = "Correlation x1 : x2"
)
plot(m1)
Note that the correlations are now around zero with rather wide credible intervals.
plot(m1)
doesn’t work within a Markdown file. It works fine in an interactive session.
INLA
is a bit slower than lme4
, but still very fast for a Bayesian technique.
# Remove random effect of region
m2 <- inla(
Present ~ x1 + x2 + f(country, model = "iid") + f(countrywave, model = "iid"),
Ntrials = N,
data = dataset,
family = "binomial",
control.compute = list(dic = TRUE)
)
# Use a random walk along wave per country
m3 <- inla(
Present ~ x1 + x2 + f(country, model = "iid") + f(cwave, replicate = as.integer(country), model = "rw1"),
Ntrials = N,
data = dataset,
family = "binomial",
control.compute = list(dic = TRUE)
)
# Remove random effect of countr:wave
m4 <- inla(
Present ~ x1 + x2 + f(country, model = "iid"),
Ntrials = N,
data = dataset,
family = "binomial",
control.compute = list(dic = TRUE)
)
c(m1$dic$dic, m2$dic$dic, m3$dic$dic, m4$dic$dic)
## [1] 1382.277 1382.260 1382.677 4414.068
Based on the DIC criterion, there is not that much difference between the models with and without the region effects. The effect of wave per country is important.