Bayesian Aggregation Simulatons

Author

Ilya Musabirov, Runce Zhang

library(rstanarm)
library(bayestestR)
library(dplyr)
library(insight)

Generating data and setting up simulation for the first study (Week 8 deployment), logistic with default WIP

main_effects = list(Hardness=0, Time=0, MentalContrast=0, GrowthMindset=0, Utility=0, ExtrinsicMotivation=0)

week8_glm <- function(baseline, main_effects, N){
  
  Hardness <- sample(c(T,F), N, replace=TRUE)
  # Hardness = T => solving a hard problem will help you(right now/-) 
  # Hardness = F => solving another problem will help you(right now/-)
  
  Time <- sample(c(T,F), N, replace=TRUE)
  # Time = T => solving (a hard/another) problem will help you right now
  # Time = F => solving (a hard/another) problem will help you
  
  MentalContrast <- sample(c(T,F), N, replace=TRUE) 
  GrowthMindset <- sample(c(T,F), N, replace=TRUE) 
  Utility <- sample(c(T,F), N, replace=TRUE) 
  ExtrinsicMotivation <- sample(c(T,F), N, replace=TRUE) 
  
  y <- rbinom(N, 1, baseline)
  
  y[Hardness==T] <- rbinom(length(y[Hardness==T]), 1, main_effects[["Hardness"]])
  y[Time==T] <- rbinom(length(y[Time==T]), 1, main_effects[["Time"]])
  y[MentalContrast==T] <- rbinom(length(y[MentalContrast==T]), 1, main_effects[["MentalContrast"]])
  y[GrowthMindset==T] <- rbinom(length(y[GrowthMindset==T]), 1, main_effects[["GrowthMindset"]])
  y[Utility==T] <- rbinom(length(y[Utility==T]), 1, main_effects[["Utility"]])
  y[ExtrinsicMotivation==T] <- rbinom(length(y[ExtrinsicMotivation==T]), 1, main_effects[["ExtrinsicMotivation"]])
  
  model <- stan_glm(y ~ Hardness + Time + MentalContrast + GrowthMindset + Utility + ExtrinsicMotivation, family = "binomial") 
  return(model)
}

Testing simulation with lower-ish effects (utility and extrisic motivation are high), 600 students

main_effects = list(Hardness=0.05, Time=0.1, MentalContrast=0.2, GrowthMindset=0.2, Utility=0.3, ExtrinsicMotivation=0.4)

m1 <- week8_glm(0.1, main_effects, 600)

Results (Posterior) for study 1 (weakly informative priors)

describe_posterior(m1)  %>% print_md()
Summary of Posterior Distribution
Parameter Median 95% CI pd ROPE % in ROPE Rhat ESS
(Intercept) -1.22 [-1.70, -0.74] 100% [-0.18, 0.18] 0% 0.999 6597.00
HardnessTRUE -0.20 [-0.55, 0.13] 87.08% [-0.18, 0.18] 45.26% 1.000 6122.00
TimeTRUE 0.10 [-0.23, 0.46] 72.55% [-0.18, 0.18] 65.84% 1.000 5400.00
MentalContrastTRUE -0.14 [-0.49, 0.20] 79.65% [-0.18, 0.18] 58.68% 0.999 6954.00
GrowthMindsetTRUE 0.47 [ 0.13, 0.81] 99.60% [-0.18, 0.18] 2.45% 0.999 6348.00
UtilityTRUE 0.25 [-0.09, 0.59] 92.17% [-0.18, 0.18] 32.76% 0.999 6253.00
ExtrinsicMotivationTRUE 0.50 [ 0.16, 0.84] 99.78% [-0.18, 0.18] 0.74% 0.999 7051.00
posteriors1 <- get_parameters(m1)

posteriors1 %>% 
  summarise(across(.fns=median)) %>% 
  as.numeric() -> posterior1_location

posteriors1 %>% 
  summarise(across(.fns=mad)) %>% 
  as.numeric() -> posterior1_scale

Generating data and setting up simulations for the second study (Week 12 deployment), logistic with informative priors // with default WIP

week12_glm <- function(baseline, main_effect, N, priors=NULL){
  
  Hardness <- sample(c(T,F), N, replace=TRUE) 
  MentalContrast <- sample(c(T,F), N, replace=TRUE) 
  GrowthMindset <- sample(c(T,F), N, replace=TRUE) 
  Utility <- sample(c(T,F), N, replace=TRUE) 
  ExtrinsicMotivation <- sample(c(T,F), N, replace=TRUE) 
  EverydayBitofStudy <- sample(c(T,F), N, replace=TRUE)  # EBS = T => Study everyday = 1
  EverydayExtraProblem <- sample(c(T,F), N, replace=TRUE)  # EEP = T => Study everyday = 2
  
  y <- rbinom(N, 1, baseline)
  
  y[Hardness==T] <- rbinom(length(y[Hardness==T]), 1, main_effects[["Hardness"]])
  y[MentalContrast==T] <- rbinom(length(y[MentalContrast==T]), 1, main_effects[["MentalContrast"]])
  y[GrowthMindset==T] <- rbinom(length(y[GrowthMindset==T]), 1, main_effects[["GrowthMindset"]])
  y[Utility==T] <- rbinom(length(y[Utility==T]), 1, main_effects[["Utility"]])
  y[ExtrinsicMotivation==T] <- rbinom(length(y[ExtrinsicMotivation==T]), 1, main_effects[["ExtrinsicMotivation"]])
    y[EverydayBitofStudy==T] <- rbinom(length(y[EverydayBitofStudy==T]), 1, main_effects[["EverydayBitofStudy"]])
    y[EverydayExtraProblem==T] <- rbinom(length(y[EverydayExtraProblem==T]), 1, main_effects[["EverydayExtraProblem"]])

if(!is.null(priors)){  
    model <- stan_glm(y ~ Hardness + MentalContrast +
                        GrowthMindset + Utility +
                        ExtrinsicMotivation +
                        EverydayBitofStudy +
                        EverydayExtraProblem, 
                      family = "binomial", prior = priors) 
} else{
      model <- stan_glm(y ~ Hardness + MentalContrast +
                        GrowthMindset + Utility +
                        ExtrinsicMotivation +
                        EverydayBitofStudy +
                        EverydayExtraProblem, 
                      family = "binomial")
}
  return(model)
}

Setting effects a bit lower

main_effects = list(Hardness=0.05, Time=0.1, MentalContrast=0.2, GrowthMindset=0.2, Utility=0.3, ExtrinsicMotivation=0.4, EverydayBitofStudy=0.2, EverydayExtraProblem=0.2)

my_prior <- normal(location = c(posterior1_location[c(2,4,5,6,7)], 0, 0), scale = c(posterior1_scale[c(2,4,5,6,7)], 5, 5))

Setting up models with (partially) informative vs weakly informative priors

m2_wip <- week12_glm(0.1, main_effects, 400, priors = NULL)
m2 <- week12_glm(0.1, main_effects, 400, priors = my_prior)

Checking priors

Code
describe_prior(m2_wip) %>% print_md()
Parameter Prior_Distribution Prior_Location Prior_Scale
(Intercept) normal 0 2.50
HardnessTRUE normal 0 5.01
MentalContrastTRUE normal 0 5.01
GrowthMindsetTRUE normal 0 5.00
UtilityTRUE normal 0 4.99
ExtrinsicMotivationTRUE normal 0 5.00
EverydayBitofStudyTRUE normal 0 4.99
EverydayExtraProblemTRUE normal 0 5.00
Code
describe_prior(m2) %>% print_md()
Parameter Prior_Distribution Prior_Location Prior_Scale
(Intercept) normal 0.00 2.50
HardnessTRUE normal -0.20 0.18
MentalContrastTRUE normal -0.14 0.17
GrowthMindsetTRUE normal 0.47 0.18
UtilityTRUE normal 0.25 0.18
ExtrinsicMotivationTRUE normal 0.50 0.17
EverydayBitofStudyTRUE normal 0.00 5.00
EverydayExtraProblemTRUE normal 0.00 5.00

Comparing the model posteriors after Study 2:

Study 1

Code
describe_posterior(m1) %>% print_md()
Summary of Posterior Distribution
Parameter Median 95% CI pd ROPE % in ROPE Rhat ESS
(Intercept) -1.22 [-1.70, -0.74] 100% [-0.18, 0.18] 0% 0.999 6597.00
HardnessTRUE -0.20 [-0.55, 0.13] 87.08% [-0.18, 0.18] 45.26% 1.000 6122.00
TimeTRUE 0.10 [-0.23, 0.46] 72.55% [-0.18, 0.18] 65.84% 1.000 5400.00
MentalContrastTRUE -0.14 [-0.49, 0.20] 79.65% [-0.18, 0.18] 58.68% 0.999 6954.00
GrowthMindsetTRUE 0.47 [ 0.13, 0.81] 99.60% [-0.18, 0.18] 2.45% 0.999 6348.00
UtilityTRUE 0.25 [-0.09, 0.59] 92.17% [-0.18, 0.18] 32.76% 0.999 6253.00
ExtrinsicMotivationTRUE 0.50 [ 0.16, 0.84] 99.78% [-0.18, 0.18] 0.74% 0.999 7051.00

Study 2 w/priors based on Study 1 results

Code
describe_posterior(m2) %>% print_md()
Summary of Posterior Distribution
Parameter Median 95% CI pd ROPE % in ROPE Rhat ESS
(Intercept) -1.24 [-1.74, -0.74] 100% [-0.18, 0.18] 0% 1.000 5350.00
HardnessTRUE -0.10 [-0.40, 0.19] 75.25% [-0.18, 0.18] 71.39% 1.000 5523.00
MentalContrastTRUE -0.24 [-0.51, 0.03] 96.00% [-0.18, 0.18] 33.34% 1.000 5402.00
GrowthMindsetTRUE 0.24 [-0.04, 0.52] 95.50% [-0.18, 0.18] 32.74% 1.000 4824.00
UtilityTRUE 0.23 [-0.05, 0.50] 94.35% [-0.18, 0.18] 37.50% 1.000 4372.00
ExtrinsicMotivationTRUE 0.43 [ 0.15, 0.70] 99.92% [-0.18, 0.18] 1.61% 1.000 4256.00
EverydayBitofStudyTRUE -0.26 [-0.73, 0.20] 86.15% [-0.18, 0.18] 35.89% 1.000 5055.00
EverydayExtraProblemTRUE -0.35 [-0.83, 0.13] 92.17% [-0.18, 0.18] 24.05% 1.000 4853.00

Study 2 w/weakly informative priors

Code
describe_posterior(m2_wip) %>% print_md()
Summary of Posterior Distribution
Parameter Median 95% CI pd ROPE % in ROPE Rhat ESS
(Intercept) -1.30 [-1.96, -0.67] 100% [-0.18, 0.18] 0% 1.000 4965.00
HardnessTRUE 0.06 [-0.42, 0.55] 61.05% [-0.18, 0.18] 54.76% 0.999 5225.00
MentalContrastTRUE 0.10 [-0.37, 0.56] 66.27% [-0.18, 0.18] 53.16% 1.000 5032.00
GrowthMindsetTRUE -0.24 [-0.70, 0.22] 83.95% [-0.18, 0.18] 38.82% 1.000 4639.00
UtilityTRUE 0.24 [-0.24, 0.70] 84.28% [-0.18, 0.18] 38.24% 1.000 4810.00
ExtrinsicMotivationTRUE 0.11 [-0.35, 0.55] 68.10% [-0.18, 0.18] 54.74% 0.999 4845.00
EverydayBitofStudyTRUE -0.32 [-0.79, 0.17] 90.58% [-0.18, 0.18] 26.82% 0.999 4806.00
EverydayExtraProblemTRUE 0.21 [-0.25, 0.67] 80.95% [-0.18, 0.18] 42.37% 0.999 5217.00
Code
library(dplyr) # pipe, mutate, rows_append
library(tidyr) # gather(), should use pivot_longer actually
library(ggplot2) #draw stuff
library(ggdist) #uncertainty geoms

Attaching package: 'ggdist'
The following object is masked from 'package:bayestestR':

    hdi
Code
library(gt)
# get_parameters(m1) %>%  
#   gather() %>% 
#   mutate(model = "study 1") %>% 
#   rows_append(get_parameters(m2) %>%  
#   gather() %>% 
#   mutate(model = "study 2")) %>% 
#   rows_append(get_parameters(m2_wip) %>%  
#   gather() %>% 
#   mutate(model = "study 2 WIP")) %>% 
#   ggplot(aes(y = key, x = value, fill = model,
#              group=model, side=ifelse(model == "study 2", "left", "right"))) +
#   stat_halfeye() +
#   ggtitle("Comparing model 1 and 2 posteriors")

#orientation = 'vertical'

Are study 2 posteriors more narrow?

Code
get_parameters(m1) %>%  
  gather() %>% 
  group_by(key) %>% 
  summarise(med = median(value), mad = mad(value)) %>% 
  arrange(desc(key)) %>% 
  full_join(
get_parameters(m2) %>%  
  gather() %>% 
  group_by(key) %>% 
  summarise(med = median(value), mad = mad(value))%>% 
  arrange(desc(key)),
by = c("key"="key")) %>% 
  gt() 
key med.x mad.x med.y mad.y
UtilityTRUE 0.2525755 0.1759858 0.2265651 0.1413766
TimeTRUE 0.1042191 0.1693349 NA NA
MentalContrastTRUE -0.1437140 0.1688660 -0.2363112 0.1400454
HardnessTRUE -0.2018847 0.1802990 -0.1029158 0.1481826
GrowthMindsetTRUE 0.4674130 0.1786771 0.2424320 0.1395079
ExtrinsicMotivationTRUE 0.5026505 0.1720207 0.4263739 0.1394033
(Intercept) -1.2155290 0.2364808 -1.2356021 0.2494122
EverydayExtraProblemTRUE NA NA -0.3456370 0.2503536
EverydayBitofStudyTRUE NA NA -0.2627639 0.2366252

Yes (though not so much)

Let’s visually compare study 1, study 2 (WIP) and study 2 (w/study 1 priors) Figure 1

Code
 get_parameters(m1) %>%  
  gather() %>% 
  mutate(model = "study 1") %>% 
  rows_append(get_parameters(m2) %>%  
  gather() %>% 
  mutate(model = "study 2")) %>% 
  rows_append(get_parameters(m2_wip) %>%  
  gather() %>% 
  mutate(model = "study 2 WIP")) %>% 
  ggplot(aes(y = key, x = value, fill = model,
             group=model)) +
  stat_gradientinterval(position = "dodge") +
  ylab(NULL) +  xlab(NULL) + 
  xlim(c(-2.8,1.54)) +
  theme(legend.position="bottom") +
   theme(axis.text.y = 
           element_text(vjust = -0,margin =                                    margin(l = 20, r = -220)),
         axis.text=element_text(size=20),
         legend.margin=margin(t = -60, l=400),
         legend.text=element_text(size=20),) + geom_vline(xintercept=0)

Green posteriors (study 2) seem to average both