Code
library(rstanarm)
library(dplyr)
library(stringr)
library(glue)library(rstanarm)
library(dplyr)
library(stringr)
library(glue)sprintf_transformer <- function(text, envir) {
m <- regexpr(":.+$", text)
if (m != -1) {
format <- substring(regmatches(text, m), 2)
regmatches(text, m) <- ""
res <- eval(parse(text = text, keep.source = FALSE), envir)
do.call(sprintf, list(glue("%{format}"), res))
} else {
eval(parse(text = text, keep.source = FALSE), envir)
}
}fit_one_glm <- function(baseline_probability,
main_effect, interaction_multiplier, N) {
f1 <- sample(c(T,F), N, replace=TRUE)
f2 <- sample(c(T,F), N, replace=TRUE)
y <- rbinom(N, 1, baseline_probability)
y[f1==T] <- rbinom(length(y[f1==T]),
1, main_effect)
(p_current <- sum(y)/length(y))
y[f2==T] <- rbinom(length(y[f2==T]),
1, main_effect)
y[f2==T&f1==T] <- rbinom(length(y[f2==T&f1==T]),
1, (main_effect*interaction_multiplier))
m1 <- glm(y ~ f1*f2, family="binomial")
m1
}
power_glm <- function(baseline_probability, main_effect, interaction_multiplier, N=100, nsim=500) {
pvals <- data.frame(f1=numeric(length = nsim),
f2=numeric(length = nsim),
int=numeric(length = nsim))
coef <- data.frame(f1=numeric(length = nsim),
f2=numeric(length = nsim),
int=numeric(length = nsim),
intercept=numeric(length = nsim))
for (i in 1:nsim) {
m1 <- fit_one_glm(baseline_probability, main_effect, interaction_multiplier, N)
pvals[i,"f1"] <- coef(summary(m1))["f1TRUE", "Pr(>|z|)"]
pvals[i,"f2"] <- coef(summary(m1))["f2TRUE", "Pr(>|z|)"]
pvals[i,"int"] <- coef(summary(m1))["f1TRUE:f2TRUE", "Pr(>|z|)"]
coef[i,"f1"] <- coef(summary(m1))["f1TRUE", "Estimate"]
coef[i,"f2"] <- coef(summary(m1))["f2TRUE", "Estimate"]
coef[i,"int"] <- coef(summary(m1))["f1TRUE:f2TRUE", "Estimate"]
coef[i,"intercept"] <- coef(summary(m1))["(Intercept)", "Estimate"]
pvals$what <- "p"
pvals$intercept <- NA
coef$what <- "coef"
res = rbind(pvals,coef)
}
return(res)
}
print_sim_results <- function(sim){
print("Power")
sim %>% filter(what == "p") %>%
select(-what) %>%
summarise(across(.fns=function(x){sum(x<0.05)/length(x)})) %>% print()
sim %>% filter(what == "coef") %>%
select(-what) %>%
pull(intercept) %>% median() -> intercept
sim %>% filter(what == "coef") %>%
select(-what) %>%
pull(f1) %>% median() -> f1
sim %>% filter(what == "coef") %>%
select(-what) %>%
pull(f2) %>% median() -> f2
sim %>% filter(what == "coef") %>%
select(-what) %>%
pull(int) %>% median() -> interaction
print(str_glue("Baseline probability {exp(intercept):.3f},
odds ratio f1 {exp(f1):.3f}, prob f1 {exp(intercept+f1):.3f},
odds ratio f2 {exp(f2):.3f}, prob f2 {exp(intercept+f2):.3f},
odds ratio interaction {exp(interaction):.3f}",
.transformer = sprintf_transformer))
}Baseline engagement ~ 10%, each factor leads to twice as much engagement, the interaction is ~50% (0.2*2.5). N = 600 students Power for main effects 66% :( interaction 28% :((
power_glm(0.1, 0.2, 2.5, 600, 1000) %>% print_sim_results()[1] "Power"
f1 f2 int intercept
1 0.668 0.684 0.275 NA
Baseline probability 0.110,
odds ratio f1 2.248, prob f1 0.248,
odds ratio f2 2.265, prob f2 0.250,
odds ratio interaction 1.777
Baseline engagement ~ 10%, each factor leads to 2.2**** as much engagement, the interaction is ~50% (0.2*2.5). N = 600 students. Power for main effects ~80% :), interaction 26% :(
power_glm(0.1, 0.22, 2.5, 600, 1000) %>% print_sim_results()[1] "Power"
f1 f2 int intercept
1 0.819 0.804 0.264 NA
Baseline probability 0.110,
odds ratio f1 2.546, prob f1 0.281,
odds ratio f2 2.567, prob f2 0.283,
odds ratio interaction 1.721
Baseline engagement ~ 5%, each factor leads to twice as much engagement, the interaction is ~20% (0.1*2.5). N = 600 students
power_glm(0.05, 0.1, 2.5, 600, 1000) %>% print_sim_results()[1] "Power"
f1 f2 int intercept
1 0.344 0.344 0.102 NA
Baseline probability 0.052,
odds ratio f1 2.135, prob f1 0.110,
odds ratio f2 2.106, prob f2 0.109,
odds ratio interaction 1.448
Baseline engagement ~ 5%, each factor leads to twice as much engagement, the interaction is ~20% (0.1*2.5). N = 1800 students
power_glm(0.05, 0.1, 2.5, 1800, 1000) %>% print_sim_results()[1] "Power"
f1 f2 int intercept
1 0.828 0.812 0.198 NA
Baseline probability 0.052,
odds ratio f1 2.119, prob f1 0.110,
odds ratio f2 2.142, prob f2 0.111,
odds ratio interaction 1.406
fit_one_bayes <- function(baseline_probability, main_effect,
interaction_multiplier, N=100){
f1 <- sample(c(T,F), N, replace=TRUE)
f2 <- sample(c(T,F), N, replace=TRUE)
y <- rbinom(N, 1, baseline_probability)
y[f1==T] <- rbinom(length(y[f1==T]),
1, main_effect)
(p_current <- sum(y)/length(y))
y[f2==T] <- rbinom(length(y[f2==T]),
1, main_effect)
y[f2==T&f1==T] <- rbinom(length(y[f2==T&f1==T]),
1, (main_effect*interaction_multiplier))
m <- stan_glm(y ~ f1*f2, family="binomial")
m
}m1 <- fit_one_bayes(0.05, 0.1, 2.5, 1800)Warning: Omitting the 'data' argument is not recommended and may not be allowed
in future versions of rstanarm. Some post-estimation functions (in particular
'update', 'loo', 'kfold') are not guaranteed to work properly unless 'data' is
specified as a data frame.
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 4.9e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.491928 seconds (Warm-up)
Chain 1: 0.520733 seconds (Sampling)
Chain 1: 1.01266 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 4.8e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.48 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.514085 seconds (Warm-up)
Chain 2: 0.51243 seconds (Sampling)
Chain 2: 1.02652 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 5e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.5 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.49408 seconds (Warm-up)
Chain 3: 0.505261 seconds (Sampling)
Chain 3: 0.999341 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 4.6e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.46 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.499183 seconds (Warm-up)
Chain 4: 0.562588 seconds (Sampling)
Chain 4: 1.06177 seconds (Total)
Chain 4:
library(bayestestR)
library(insight)
library(ggplot2)
posterior_sum <- describe_posterior(m1)Possible multicollinearity between f2TRUE and f1TRUE (r = 0.72), f1TRUE:f2TRUE and f1TRUE (r = 0.85), f1TRUE:f2TRUE and f2TRUE (r = 0.83). This might lead to inappropriate results. See 'Details' in '?rope'.
# for a nicer table
print_md(posterior_sum, digits = 2)| Parameter | Median | 95% CI | pd | ROPE | % in ROPE | Rhat | ESS |
|---|---|---|---|---|---|---|---|
| (Intercept) | -3.19 | [-3.69, -2.75] | 100% | [-0.18, 0.18] | 0% | 1.003 | 1205.00 |
| f1TRUE | 1.01 | [ 0.45, 1.59] | 100% | [-0.18, 0.18] | 0% | 1.004 | 1222.00 |
| f2TRUE | 1.15 | [ 0.62, 1.73] | 100% | [-0.18, 0.18] | 0% | 1.005 | 1317.00 |
| f1TRUE:f2TRUE | -0.04 | [-0.70, 0.62] | 54.50% | [-0.18, 0.18] | 42.39% | 1.005 | 1195.00 |
posteriors <- get_parameters(m1)
ggplot(posteriors, aes(x = f2TRUE)) +
geom_density(fill = "orange")hdi(posteriors$f1TRUE, ci = 0.9)90% HDI: [0.55, 1.50]
rope_range(m1)[1] -0.1813799 0.1813799
p_direction(posteriors$f2TRUE)Probability of Direction: 1.00