library(lme4, quietly = T)
library(rstan, quietly = T)
library(rstanarm, quietly = T)
library(brms, quietly = T)
library(broom, quietly = T)
This .data model in Stan looks similar to HTS data.
load(file = "/home/rness/projects/sandbox/lme4_study/data.Rd")
Plates are nested in experiments, the following dataset gives plates both nested and unique labels.
.data
## titer experiment nested_plate_label unique_plate_label
## 1 1.0466667 A a A:a
## 2 1.0433333 A a A:a
## 3 1.0016667 A b A:b
## 4 1.0383333 A b A:b
## 5 1.0450000 A c A:c
## 6 1.0516667 A c A:c
## 7 1.0000000 B a B:a
## 8 1.0233333 B a B:a
## 9 0.9583333 B b B:b
## 10 0.9483333 B b B:b
## 11 1.0183333 B c B:c
## 12 0.9816667 B c B:c
## 13 0.9783333 C a C:a
## 14 0.9583333 C a C:a
## 15 1.0650000 C b C:b
## 16 1.0516667 C b C:b
## 17 1.0900000 C c C:c
## 18 1.0616667 C c C:c
## 19 0.9516667 D a D:a
## 20 0.9400000 D a D:a
## 21 0.9483333 D b D:b
## 22 0.9766667 D b D:b
## 23 1.0783333 D c D:c
## 24 1.0750000 D c D:c
## 25 0.9183333 E a E:a
## 26 0.9183333 E a E:a
## 27 0.9116667 E b E:b
## 28 0.9033333 E b E:b
## 29 0.9800000 E c E:c
## 30 0.9583333 E c E:c
## 31 1.0566667 F a F:a
## 32 1.0816667 F a F:a
## 33 0.9883333 F b F:b
## 34 0.9683333 F b F:b
## 35 1.0083333 F c F:c
## 36 1.0000000 F c F:c
## 37 1.0416667 G a G:a
## 38 1.0433333 G a G:a
## 39 1.0166667 G b G:b
## 40 0.9783333 G b G:b
## 41 0.9483333 G c G:c
## 42 0.9616667 G c G:c
## 43 0.9866667 H a H:a
## 44 0.9900000 H a H:a
## 45 1.0866667 H b H:b
## 46 1.1000000 H b H:b
## 47 1.0800000 H c H:c
## 48 1.0683333 H c H:c
## 49 0.9133333 I a I:a
## 50 0.9133333 I a I:a
## 51 1.0666667 I b I:b
## 52 1.0666667 I b I:b
## 53 0.9616667 I c I:c
## 54 0.9466667 I c I:c
## 55 0.9716667 J a J:a
## 56 0.9883333 J a J:a
## 57 0.9866667 J b J:b
## 58 0.9866667 J b J:b
## 59 0.9816667 J c J:c
## 60 0.9433333 J c J:c
levels(.data$nested_plate_label)
## [1] "a" "b" "c"
levels(.data$unique_plate_label)
## [1] "A:a" "A:b" "A:c" "B:a" "B:b" "B:c" "C:a" "C:b" "C:c" "D:a" "D:b"
## [12] "D:c" "E:a" "E:b" "E:c" "F:a" "F:b" "F:c" "G:a" "G:b" "G:c" "H:a"
## [23] "H:b" "H:c" "I:a" "I:b" "I:c" "J:a" "J:b" "J:c"
Below I define formulas for 4 models. These models are all equivalent.
fm1 <- titer ~ (1|experiment) + (1|unique_plate_label)
fm2 <- titer ~ (1|experiment/nested_plate_label)
fm3 <- titer ~ (1|experiment) + (1|experiment:nested_plate_label)
fm4 <- titer ~ (1|experiment/unique_plate_label)
For reference, the following model is different and is not appropriate for this data.
fm5 <- titer ~ (1|experiment) + (1|nested_plate_label)
You see their equivalence by comparing the outputs of the lme4 model fits.
results <- data.frame(
model1 = tidy(lmer(fm1, .data))$estimate,
model2 = tidy(lmer(fm2, .data))$estimate,
model3 = tidy(lmer(fm3, .data))$estimate,
model4 = tidy(lmer(fm4, .data))$estimate
)
rownames(results) <- c("Intercept", "sd_Intercept.plate_label:experiment", "sd_Intercept.experiment", "sd_Observation.Residual")
results
## model1 model2 model3
## Intercept 1.00088889 1.00088889 1.00088889
## sd_Intercept.plate_label:experiment 0.04840129 0.04840129 0.04840129
## sd_Intercept.experiment 0.02145609 0.02145609 0.02145609
## sd_Observation.Residual 0.01372346 0.01372346 0.01372346
## model4
## Intercept 1.00088889
## sd_Intercept.plate_label:experiment 0.04840129
## sd_Intercept.experiment 0.02145609
## sd_Observation.Residual 0.01372346
The Stan wrappers produce the same Stan code.
sc1 <- make_stancode(fm1, .data)
sc2 <- make_stancode(fm2, .data)
sc3 <- make_stancode(fm3, .data)
sc4 <- make_stancode(fm4, .data)
sc1 == sc2 & sc2 == sc3 & sc3 == sc4
## [1] TRUE
The following is a re-writting of the Stan code to more readible form.
model_str <- "
data {
int<lower=1> N; // total number of observations
vector[N] titer; // response variable
// data for group-level effects of experiment
int<lower=1> experiment_id[N];
int<lower=1> N_experiment;
int<lower=1> M_experiment; // extraneous, number of coefficients in a group
// data for group-level effects of plate
int<lower=1> plate_id[N];
int<lower=1> N_plate;
int<lower=1> M_plate;
}
parameters {
real b_Intercept;
real<lower=0> sigma; // residual SD
vector<lower=0>[M_experiment] sd_experiment; // group-level standard deviations
vector[N_experiment] experiment_effect_unscaled[M_experiment]; // unscaled experiment-level effects
vector<lower=0>[M_plate] sd_plate; // group-level standard deviations
vector[N_plate] plate_effect_unscaled[M_plate]; // unscaled plate-level effects
}
transformed parameters {
// experiment-level effects
vector[N_experiment] experiment_effect = sd_experiment[1] * (experiment_effect_unscaled[1]);
// plate-level effects
vector[N_plate] plate_effect = sd_plate[1] * (plate_effect_unscaled[1]);
vector[N] mu = rep_vector(0, N) + b_Intercept;
for (n in 1:N) {
mu[n] = mu[n] + (experiment_effect[experiment_id[n]]) + (plate_effect[plate_id[n]]);
}
}
model {
// priors including all constants
//target += student_t_lpdf(sigma | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10);
sigma ~ cauchy(0, 5);
//target += student_t_lpdf(sd_experiment | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10);
sd_experiment ~ cauchy(0, 5);
experiment_effect_unscaled[1] ~ normal(0, 1);
//target += student_t_lpdf(sd_plate | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10);
sd_plate ~ cauchy(0, 5) ;
plate_effect_unscaled[1] ~ normal(0, 1);
// likelihood including all constants
titer ~ normal(mu, sigma);
}
generated quantities{
vector[N] y_sim;
for(n in 1:N){
y_sim[n] = normal_rng(mu[n], sigma);
}
}
"
model <- stan_model(model_code = model_str)
## In file included from /usr/local/lib/R/site-library/BH/include/boost/config.hpp:39:0,
## from /usr/local/lib/R/site-library/BH/include/boost/math/tools/config.hpp:13,
## from /usr/local/lib/R/site-library/StanHeaders/include/stan/math/rev/core/var.hpp:7,
## from /usr/local/lib/R/site-library/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
## from /usr/local/lib/R/site-library/StanHeaders/include/stan/math/rev/core.hpp:12,
## from /usr/local/lib/R/site-library/StanHeaders/include/stan/math/rev/mat.hpp:4,
## from /usr/local/lib/R/site-library/StanHeaders/include/stan/math.hpp:4,
## from /usr/local/lib/R/site-library/StanHeaders/include/src/stan/model/model_header.hpp:4,
## from file272728b71ab4.cpp:8:
## /usr/local/lib/R/site-library/BH/include/boost/config/compiler/gcc.hpp:186:0: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## # define BOOST_NO_CXX11_RVALUE_REFERENCES
## ^
## <command-line>:0:0: note: this is the location of the previous definition
.dat <- list(
N = nrow(.data),
titer = .data$titer,
experiment_id = as.integer(.data$experiment),
N_experiment = length(levels(.data$experiment)),
M_experiment = 1,
plate_id = as.integer(.data$unique_plate_label),
N_plate = length(levels(.data$unique_plate_label)),
M_plate = 1
)
stan_fit <- sampling(model, .dat)
##
## SAMPLING FOR MODEL 'f29b87432c5de46ed581af57e4151034' NOW (CHAIN 1).
##
## Gradient evaluation took 1.9e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.19 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 1.00007 seconds (Warm-up)
## 0.490211 seconds (Sampling)
## 1.49028 seconds (Total)
##
##
## SAMPLING FOR MODEL 'f29b87432c5de46ed581af57e4151034' NOW (CHAIN 2).
##
## Gradient evaluation took 1.3e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 1.00096 seconds (Warm-up)
## 0.37874 seconds (Sampling)
## 1.3797 seconds (Total)
##
##
## SAMPLING FOR MODEL 'f29b87432c5de46ed581af57e4151034' NOW (CHAIN 3).
##
## Gradient evaluation took 2.8e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 1.03446 seconds (Warm-up)
## 0.437107 seconds (Sampling)
## 1.47157 seconds (Total)
##
##
## SAMPLING FOR MODEL 'f29b87432c5de46ed581af57e4151034' NOW (CHAIN 4).
##
## Gradient evaluation took 2.8e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.981262 seconds (Warm-up)
## 0.620169 seconds (Sampling)
## 1.60143 seconds (Total)
## Warning: There were 211 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
intercept <- mean(extract(stan_fit, "b_Intercept")[[1]])
sd_experiment <- mean(extract(stan_fit, "sd_experiment")[[1]][, 1])
sd_plate <- mean(extract(stan_fit, "sd_plate")[[1]][, 1])
sd_error <- mean(extract(stan_fit, "sigma")[[1]])
results$stan <- c(intercept, sd_plate, sd_experiment, sd_error)
results
## model1 model2 model3
## Intercept 1.00088889 1.00088889 1.00088889
## sd_Intercept.plate_label:experiment 0.04840129 0.04840129 0.04840129
## sd_Intercept.experiment 0.02145609 0.02145609 0.02145609
## sd_Observation.Residual 0.01372346 0.01372346 0.01372346
## model4 stan
## Intercept 1.00088889 1.00163555
## sd_Intercept.plate_label:experiment 0.04840129 0.05167856
## sd_Intercept.experiment 0.02145609 0.02388808
## sd_Observation.Residual 0.01372346 0.01442297