Bayesian Aggregation Simulatons

Author

Ilya Musabirov, Runce Zhang

library(rstanarm)
library(bayestestR)
library(dplyr)
library(insight)

Generating data and setting up simulation for the first study (Week 8 deployment), logistic with default WIP

main_effects = list(Hardness=0, Time=0, MentalContrast=0, GrowthMindset=0, Utility=0, ExtrinsicMotivation=0)

week8_glm <- function(baseline, main_effects, N){
  
  Hardness <- sample(c(T,F), N, replace=TRUE)
  # Hardness = T => solving a hard problem will help you(right now/-) 
  # Hardness = F => solving another problem will help you(right now/-)
  
  Time <- sample(c(T,F), N, replace=TRUE)
  # Time = T => solving (a hard/another) problem will help you right now
  # Time = F => solving (a hard/another) problem will help you
  
  MentalContrast <- sample(c(T,F), N, replace=TRUE) 
  GrowthMindset <- sample(c(T,F), N, replace=TRUE) 
  Utility <- sample(c(T,F), N, replace=TRUE) 
  ExtrinsicMotivation <- sample(c(T,F), N, replace=TRUE) 
  
  y <- rbinom(N, 1, baseline)
  
  y[Hardness==T] <- rbinom(length(y[Hardness==T]), 1, main_effects[["Hardness"]])
  y[Time==T] <- rbinom(length(y[Time==T]), 1, main_effects[["Time"]])
  y[MentalContrast==T] <- rbinom(length(y[MentalContrast==T]), 1, main_effects[["MentalContrast"]])
  y[GrowthMindset==T] <- rbinom(length(y[GrowthMindset==T]), 1, main_effects[["GrowthMindset"]])
  y[Utility==T] <- rbinom(length(y[Utility==T]), 1, main_effects[["Utility"]])
  y[ExtrinsicMotivation==T] <- rbinom(length(y[ExtrinsicMotivation==T]), 1, main_effects[["ExtrinsicMotivation"]])
  
  model <- stan_glm(y ~ Hardness + Time + MentalContrast + GrowthMindset + Utility + ExtrinsicMotivation, family = "binomial") 
  return(model)
}

Testing simulation with lower-ish effects (utility and extrisic motivation are high), 600 students

main_effects = list(Hardness=0.05, Time=0.1, MentalContrast=0.2, GrowthMindset=0.2, Utility=0.3, ExtrinsicMotivation=0.4)

m1 <- week8_glm(0.1, main_effects, 600)

Results (Posterior) for study 1 (weakly informative priors)

describe_posterior(m1)  %>% print_md()

Summary of Posterior Distribution
Parameter	Median	95% CI	pd	ROPE	% in ROPE	Rhat	ESS
(Intercept)	-1.22	[-1.70, -0.74]	100%	[-0.18, 0.18]	0%	0.999	6597.00
HardnessTRUE	-0.20	[-0.55, 0.13]	87.08%	[-0.18, 0.18]	45.26%	1.000	6122.00
TimeTRUE	0.10	[-0.23, 0.46]	72.55%	[-0.18, 0.18]	65.84%	1.000	5400.00
MentalContrastTRUE	-0.14	[-0.49, 0.20]	79.65%	[-0.18, 0.18]	58.68%	0.999	6954.00
GrowthMindsetTRUE	0.47	[ 0.13, 0.81]	99.60%	[-0.18, 0.18]	2.45%	0.999	6348.00
UtilityTRUE	0.25	[-0.09, 0.59]	92.17%	[-0.18, 0.18]	32.76%	0.999	6253.00
ExtrinsicMotivationTRUE	0.50	[ 0.16, 0.84]	99.78%	[-0.18, 0.18]	0.74%	0.999	7051.00

posteriors1 <- get_parameters(m1)

posteriors1 %>% 
  summarise(across(.fns=median)) %>% 
  as.numeric() -> posterior1_location

posteriors1 %>% 
  summarise(across(.fns=mad)) %>% 
  as.numeric() -> posterior1_scale

Generating data and setting up simulations for the second study (Week 12 deployment), logistic with informative priors // with default WIP

week12_glm <- function(baseline, main_effect, N, priors=NULL){
  
  Hardness <- sample(c(T,F), N, replace=TRUE) 
  MentalContrast <- sample(c(T,F), N, replace=TRUE) 
  GrowthMindset <- sample(c(T,F), N, replace=TRUE) 
  Utility <- sample(c(T,F), N, replace=TRUE) 
  ExtrinsicMotivation <- sample(c(T,F), N, replace=TRUE) 
  EverydayBitofStudy <- sample(c(T,F), N, replace=TRUE)  # EBS = T => Study everyday = 1
  EverydayExtraProblem <- sample(c(T,F), N, replace=TRUE)  # EEP = T => Study everyday = 2
  
  y <- rbinom(N, 1, baseline)
  
  y[Hardness==T] <- rbinom(length(y[Hardness==T]), 1, main_effects[["Hardness"]])
  y[MentalContrast==T] <- rbinom(length(y[MentalContrast==T]), 1, main_effects[["MentalContrast"]])
  y[GrowthMindset==T] <- rbinom(length(y[GrowthMindset==T]), 1, main_effects[["GrowthMindset"]])
  y[Utility==T] <- rbinom(length(y[Utility==T]), 1, main_effects[["Utility"]])
  y[ExtrinsicMotivation==T] <- rbinom(length(y[ExtrinsicMotivation==T]), 1, main_effects[["ExtrinsicMotivation"]])
    y[EverydayBitofStudy==T] <- rbinom(length(y[EverydayBitofStudy==T]), 1, main_effects[["EverydayBitofStudy"]])
    y[EverydayExtraProblem==T] <- rbinom(length(y[EverydayExtraProblem==T]), 1, main_effects[["EverydayExtraProblem"]])

if(!is.null(priors)){  
    model <- stan_glm(y ~ Hardness + MentalContrast +
                        GrowthMindset + Utility +
                        ExtrinsicMotivation +
                        EverydayBitofStudy +
                        EverydayExtraProblem, 
                      family = "binomial", prior = priors) 
} else{
      model <- stan_glm(y ~ Hardness + MentalContrast +
                        GrowthMindset + Utility +
                        ExtrinsicMotivation +
                        EverydayBitofStudy +
                        EverydayExtraProblem, 
                      family = "binomial")
}
  return(model)
}

Setting effects a bit lower

main_effects = list(Hardness=0.05, Time=0.1, MentalContrast=0.2, GrowthMindset=0.2, Utility=0.3, ExtrinsicMotivation=0.4, EverydayBitofStudy=0.2, EverydayExtraProblem=0.2)

my_prior <- normal(location = c(posterior1_location[c(2,4,5,6,7)], 0, 0), scale = c(posterior1_scale[c(2,4,5,6,7)], 5, 5))

Setting up models with (partially) informative vs weakly informative priors

m2_wip <- week12_glm(0.1, main_effects, 400, priors = NULL)
m2 <- week12_glm(0.1, main_effects, 400, priors = my_prior)

Checking priors

Code

describe_prior(m2_wip) %>% print_md()

Parameter	Prior_Distribution	Prior_Scale
(Intercept)	normal	2.50
HardnessTRUE	normal	5.01
MentalContrastTRUE	normal	5.01
GrowthMindsetTRUE	normal	5.00
UtilityTRUE	normal	4.99
ExtrinsicMotivationTRUE	normal	5.00
EverydayBitofStudyTRUE	normal	4.99
EverydayExtraProblemTRUE	normal	5.00

Code

describe_prior(m2) %>% print_md()

Parameter	Prior_Distribution	Prior_Location	Prior_Scale
(Intercept)	normal	0.00	2.50
HardnessTRUE	normal	-0.20	0.18
MentalContrastTRUE	normal	-0.14	0.17
GrowthMindsetTRUE	normal	0.47	0.18
UtilityTRUE	normal	0.25	0.18
ExtrinsicMotivationTRUE	normal	0.50	0.17
EverydayBitofStudyTRUE	normal	0.00	5.00
EverydayExtraProblemTRUE	normal	0.00	5.00

Comparing the model posteriors after Study 2:

Study 1

Code

describe_posterior(m1) %>% print_md()

Summary of Posterior Distribution
Parameter	Median	95% CI	pd	ROPE	% in ROPE	Rhat	ESS
(Intercept)	-1.22	[-1.70, -0.74]	100%	[-0.18, 0.18]	0%	0.999	6597.00
HardnessTRUE	-0.20	[-0.55, 0.13]	87.08%	[-0.18, 0.18]	45.26%	1.000	6122.00
TimeTRUE	0.10	[-0.23, 0.46]	72.55%	[-0.18, 0.18]	65.84%	1.000	5400.00
MentalContrastTRUE	-0.14	[-0.49, 0.20]	79.65%	[-0.18, 0.18]	58.68%	0.999	6954.00
GrowthMindsetTRUE	0.47	[ 0.13, 0.81]	99.60%	[-0.18, 0.18]	2.45%	0.999	6348.00
UtilityTRUE	0.25	[-0.09, 0.59]	92.17%	[-0.18, 0.18]	32.76%	0.999	6253.00
ExtrinsicMotivationTRUE	0.50	[ 0.16, 0.84]	99.78%	[-0.18, 0.18]	0.74%	0.999	7051.00

Study 2 w/priors based on Study 1 results

Code

describe_posterior(m2) %>% print_md()

Summary of Posterior Distribution
Parameter	Median	95% CI	pd	ROPE	% in ROPE	Rhat	ESS
(Intercept)	-1.24	[-1.74, -0.74]	100%	[-0.18, 0.18]	0%	1.000	5350.00
HardnessTRUE	-0.10	[-0.40, 0.19]	75.25%	[-0.18, 0.18]	71.39%	1.000	5523.00
MentalContrastTRUE	-0.24	[-0.51, 0.03]	96.00%	[-0.18, 0.18]	33.34%	1.000	5402.00
GrowthMindsetTRUE	0.24	[-0.04, 0.52]	95.50%	[-0.18, 0.18]	32.74%	1.000	4824.00
UtilityTRUE	0.23	[-0.05, 0.50]	94.35%	[-0.18, 0.18]	37.50%	1.000	4372.00
ExtrinsicMotivationTRUE	0.43	[ 0.15, 0.70]	99.92%	[-0.18, 0.18]	1.61%	1.000	4256.00
EverydayBitofStudyTRUE	-0.26	[-0.73, 0.20]	86.15%	[-0.18, 0.18]	35.89%	1.000	5055.00
EverydayExtraProblemTRUE	-0.35	[-0.83, 0.13]	92.17%	[-0.18, 0.18]	24.05%	1.000	4853.00

Study 2 w/weakly informative priors

Code

describe_posterior(m2_wip) %>% print_md()

Summary of Posterior Distribution
Parameter	Median	95% CI	pd	ROPE	% in ROPE	Rhat	ESS
(Intercept)	-1.30	[-1.96, -0.67]	100%	[-0.18, 0.18]	0%	1.000	4965.00
HardnessTRUE	0.06	[-0.42, 0.55]	61.05%	[-0.18, 0.18]	54.76%	0.999	5225.00
MentalContrastTRUE	0.10	[-0.37, 0.56]	66.27%	[-0.18, 0.18]	53.16%	1.000	5032.00
GrowthMindsetTRUE	-0.24	[-0.70, 0.22]	83.95%	[-0.18, 0.18]	38.82%	1.000	4639.00
UtilityTRUE	0.24	[-0.24, 0.70]	84.28%	[-0.18, 0.18]	38.24%	1.000	4810.00
ExtrinsicMotivationTRUE	0.11	[-0.35, 0.55]	68.10%	[-0.18, 0.18]	54.74%	0.999	4845.00
EverydayBitofStudyTRUE	-0.32	[-0.79, 0.17]	90.58%	[-0.18, 0.18]	26.82%	0.999	4806.00
EverydayExtraProblemTRUE	0.21	[-0.25, 0.67]	80.95%	[-0.18, 0.18]	42.37%	0.999	5217.00

Code

library(dplyr) # pipe, mutate, rows_append
library(tidyr) # gather(), should use pivot_longer actually
library(ggplot2) #draw stuff
library(ggdist) #uncertainty geoms


Attaching package: 'ggdist'

The following object is masked from 'package:bayestestR':

    hdi

Code

library(gt)
# get_parameters(m1) %>%  
#   gather() %>% 
#   mutate(model = "study 1") %>% 
#   rows_append(get_parameters(m2) %>%  
#   gather() %>% 
#   mutate(model = "study 2")) %>% 
#   rows_append(get_parameters(m2_wip) %>%  
#   gather() %>% 
#   mutate(model = "study 2 WIP")) %>% 
#   ggplot(aes(y = key, x = value, fill = model,
#              group=model, side=ifelse(model == "study 2", "left", "right"))) +
#   stat_halfeye() +
#   ggtitle("Comparing model 1 and 2 posteriors")

#orientation = 'vertical'

Are study 2 posteriors more narrow?

Code

get_parameters(m1) %>%  
  gather() %>% 
  group_by(key) %>% 
  summarise(med = median(value), mad = mad(value)) %>% 
  arrange(desc(key)) %>% 
  full_join(
get_parameters(m2) %>%  
  gather() %>% 
  group_by(key) %>% 
  summarise(med = median(value), mad = mad(value))%>% 
  arrange(desc(key)),
by = c("key"="key")) %>% 
  gt()

key	med.x	mad.x	med.y	mad.y
UtilityTRUE	0.2525755	0.1759858	0.2265651	0.1413766
TimeTRUE	0.1042191	0.1693349	NA	NA
MentalContrastTRUE	-0.1437140	0.1688660	-0.2363112	0.1400454
HardnessTRUE	-0.2018847	0.1802990	-0.1029158	0.1481826
GrowthMindsetTRUE	0.4674130	0.1786771	0.2424320	0.1395079
ExtrinsicMotivationTRUE	0.5026505	0.1720207	0.4263739	0.1394033
(Intercept)	-1.2155290	0.2364808	-1.2356021	0.2494122
EverydayExtraProblemTRUE	NA	NA	-0.3456370	0.2503536
EverydayBitofStudyTRUE	NA	NA	-0.2627639	0.2366252

Yes (though not so much)

Let’s visually compare study 1, study 2 (WIP) and study 2 (w/study 1 priors) Figure 1

Code

 get_parameters(m1) %>%  
  gather() %>% 
  mutate(model = "study 1") %>% 
  rows_append(get_parameters(m2) %>%  
  gather() %>% 
  mutate(model = "study 2")) %>% 
  rows_append(get_parameters(m2_wip) %>%  
  gather() %>% 
  mutate(model = "study 2 WIP")) %>% 
  ggplot(aes(y = key, x = value, fill = model,
             group=model)) +
  stat_gradientinterval(position = "dodge") +
  ylab(NULL) +  xlab(NULL) + 
  xlim(c(-2.8,1.54)) +
  theme(legend.position="bottom") +
   theme(axis.text.y = 
           element_text(vjust = -0,margin =                                    margin(l = 20, r = -220)),
         axis.text=element_text(size=20),
         legend.margin=margin(t = -60, l=400),
         legend.text=element_text(size=20),) + geom_vline(xintercept=0)

Figure 1: Comparing model 1 and 2 posteriors

Green posteriors (study 2) seem to average both