Power analyses bootstrapped off the data from Study 1a in children (incl memory check).
We can resample various sample sizes from the current data (with replacement), and run the following analyses, to see how many times we get significant effects at each sample size (=statistical power).
Multinomial model predicting responses by condition, age, and their interaction.
Logistic model predicting responses of “Zarpies on Zarpie island” by condition, age, and their interaction.
Logistic model predicting correct responses by condition, age, and their interaction.
Logistic model predicting responses of “Zarpies on Zarpie island” (aka correct response) by age, within the skewed condition only.
Bayesian logistic model predicting responses by condition, age, and their interaction, with Normal(0, 1) prior on the intercept and all fixed effects.
Bayesian logistic model predicting responses of “Zarpies on Zarpie island” by condition, age, and their interaction, with Normal(0, 1) prior on the intercept and all fixed effects.
# set simulation parameters
set.seed(42) # reproducibility
n_sim <- # per sample size
1000
sample_sizes_to_test <- c( # total sample sizes
200,
400,
600)
# set criteria for power
alpha <- 0.05
bf_threshold <- 10
# function to run a simulation
run_simulation <- function(sample_size_total){
# resample data
resampled_data <- bind_rows(
data_incl_memory_check %>%
filter(boarding == "skewed") %>%
slice_sample(n = sample_size_total/2, replace = TRUE),
data_incl_memory_check %>%
filter(boarding == "not skewed") %>%
slice_sample(n = sample_size_total/2, replace = TRUE)) %>%
mutate(observation = row_number())
# fit models
dv_comp_cond_age <-
multinom(dv_comp ~ boarding * age_exact,
data = resampled_data)
dv_comp_pop_cond_age <-
glm(dv_comp_pop ~ boarding * age_exact,
family = "binomial",
data = resampled_data)
dv_comp_correct_cond_age <-
glm(dv_comp_correct ~ boarding * age_exact,
family = "binomial",
data = resampled_data)
dv_comp_pop_skewed_age <-
glm(dv_comp_pop ~ age_exact,
family = "binomial",
data = resampled_data %>%
filter(boarding == "skewed"))
# dv_comp_cond_age_brm <-
# brm(dv_comp ~ boarding * age_exact,
# family = categorical,
# prior = c(set_prior("normal(0,1)", class = "Intercept",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "Intercept",
# dpar = "muZarpiesonZarpieisland"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muZarpiesonZarpieisland")),
# data = resampled_data)
#
# dv_comp_cond_age_brm_null <-
# brm(dv_comp ~ boarding + age_exact,
# family = categorical,
# prior = c(set_prior("normal(0,1)", class = "Intercept",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "Intercept",
# dpar = "muZarpiesonZarpieisland"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muZarpiesonZarpieisland")),
# data = resampled_data)
#
# dv_comp_pop_cond_age_brm <-
# brm(dv_comp_pop ~ boarding * age_exact,
# family = bernoulli,
# prior = c(set_prior("normal(0,1)", class = "Intercept"),
# set_prior("normal(0,1)", class = "b")),
# data = resampled_data)
# get p-values, bf
dv_comp_cond_age_anova <- dv_comp_cond_age %>% Anova()
dv_comp_pop_cond_age_anova <- dv_comp_pop_cond_age %>% Anova()
dv_comp_correct_cond_age_anova <- dv_comp_correct_cond_age %>% Anova()
dv_comp_pop_skewed_age_anova <- dv_comp_pop_skewed_age %>% Anova()
dv_comp_cond_age_p <- dv_comp_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
dv_comp_pop_cond_age_p <- dv_comp_pop_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
dv_comp_correct_cond_age_p <- dv_comp_correct_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
dv_comp_pop_skewed_age_p <- dv_comp_pop_skewed_age_anova["age_exact", "Pr(>Chisq)"]
# dv_comp_cond_age_brm_bf <- bayes_factor(dv_comp_cond_age_brm, dv_comp_cond_age_brm_null) %>% .$bf
# dv_comp_pop_cond_age_brm_bf <- dv_comp_pop_cond_age_brm %>% hypothesis("boardingskewed:age_exact > 0") %>% pluck("hypothesis", "Evid.Ratio")
return(list(
"dv_comp_cond_age_p" = dv_comp_cond_age_p,
"dv_comp_pop_cond_age_p" = dv_comp_pop_cond_age_p,
"dv_comp_correct_cond_age_p" = dv_comp_correct_cond_age_p,
"dv_comp_pop_skewed_age_p" = dv_comp_pop_skewed_age_p
# "dv_comp_cond_age_brm_bf" = dv_comp_cond_age_brm_bf,
# "dv_comp_pop_cond_age_brm_bf" = dv_comp_pop_cond_age_brm_bf
)
)
}
# set up simulation grid with parameters
sim_grid <-
crossing(
alpha = alpha,
sample_size_total = sample_sizes_to_test,
simulation = rep(1:n_sim, length(sample_sizes_to_test)) # __ simulations per each possible sample size
)
# run simulations for each row
sim_results <- sim_grid %>%
rowwise() %>%
mutate(
vals = list(run_simulation(sample_size_total)),
dv_comp_cond_age_p = vals$dv_comp_cond_age_p,
dv_comp_pop_cond_age_p = vals$dv_comp_pop_cond_age_p,
dv_comp_correct_cond_age_p = vals$dv_comp_correct_cond_age_p,
dv_comp_pop_skewed_age_p = vals$dv_comp_pop_skewed_age_p,
# dv_comp_cond_age_brm_bf = vals$dv_comp_cond_age_brm_bf,
# dv_comp_pop_cond_age_brm_bf = vals$dv_comp_pop_cond_age_brm_bf
) %>%
ungroup()
# calculate power
power <- sim_results %>%
group_by(sample_size_total) %>%
summarize(
power_cond_age = mean(dv_comp_cond_age_p < alpha),
power_pop_cond_age = mean(dv_comp_pop_cond_age_p < alpha),
power_correct_cond_age = mean(dv_comp_correct_cond_age_p < alpha),
power_pop_skewed_age = mean(dv_comp_pop_skewed_age_p < alpha),
# power_cond_age_bf = mean(dv_comp_cond_age_brm_bf >= bf_threshold),
# power_pop_cond_age_bf = mean(dv_comp_pop_cond_age_brm_bf >= bf_threshold)
)
# save results
write_csv(power, "study-1b-children-power-analysis.csv")
| Power on target effects by total sample size (4-8yo) | ||||
| based on Study 1a data incl memory check (4-8yo) | ||||
| total sample size | all responses: condition x age interaction | pop responses: condition x age interaction | correct responses: condition x age interaction | pop responses in skewed condition: age |
|---|---|---|---|---|
| 200 | 0.078 | 0.081 | 0.293 | 0.200 |
| 400 | 0.080 | 0.109 | 0.497 | 0.358 |
| 600 | 0.119 | 0.142 | 0.681 | 0.502 |
Power analyses bootstrapped off 4yo and 7yo data from Study 1a in children (incl memory check).
We can resample various sample sizes from the particular age group of interest in the current data (including memory check failures, with replacement), and run the following analyses, to see how many times we get significant effects at each sample size (=statistical power).
Within 4yo, multinomial model predicting responses by condition.
Within 4yo, logistic model predicting responses of “Zarpies on Zarpie island” by condition.
Within 7yo, multinomial model predicting responses by condition.
Within 7yo, logistic model predicting responses of “Zarpies on Zarpie island” by condition.
Overall multinomial model predicting responses by condition, age group (4yo vs 7yo) and their interaction, testing for the interaction.
Overall logistic model predicting responses of “Zarpies on Zarpie island” by condition, age group (4yo vs 7yo) and their interaction, testing for the interaction.
# set simulation parameters
set.seed(42) # reproducibility
n_sim <- # per sample size
# 100
1000
sample_sizes_to_test <- # total sample sizes
c(200,
400,
500,
600)
# set criteria for power
alpha <- 0.05
bf_threshold <- 10
# function to run a simulation
run_simulation <- function(sample_size_total){
# resample data incl memory check
resampled_data <- bind_rows(
data_incl_memory_check %>%
filter(boarding == "skewed") %>%
filter(age_cat == 4) %>%
slice_sample(n = sample_size_total/4, replace = TRUE),
data_incl_memory_check %>%
filter(boarding == "not skewed") %>%
filter(age_cat == 4) %>%
slice_sample(n = sample_size_total/4, replace = TRUE),
data_incl_memory_check %>%
filter(boarding == "skewed") %>%
filter(age_cat == 7) %>%
slice_sample(n = sample_size_total/4, replace = TRUE),
data_incl_memory_check %>%
filter(boarding == "not skewed") %>%
filter(age_cat == 7) %>%
slice_sample(n = sample_size_total/4, replace = TRUE),
) %>%
mutate(observation = row_number())
# fit models
dv_comp_cond_4yo <-
multinom(dv_comp ~ boarding,
data = resampled_data %>%
filter(age_cat == 4))
dv_comp_pop_cond_4yo <-
glm(dv_comp_pop ~ boarding,
family = "binomial",
data = resampled_data %>%
filter(age_cat == 4))
dv_comp_cond_7yo <-
multinom(dv_comp ~ boarding,
data = resampled_data %>%
filter(age_cat == 7))
dv_comp_pop_cond_7yo <-
glm(dv_comp_pop ~ boarding,
family = "binomial",
data = resampled_data %>%
filter(age_cat == 7))
dv_comp_cond_age_cat <-
multinom(dv_comp ~ boarding * age_cat,
data = resampled_data)
dv_comp_pop_cond_age_cat <-
glm(dv_comp_pop ~ boarding * age_cat,
family = "binomial",
data = resampled_data)
# dv_comp_cond_4yo_brm <-
# brm(dv_comp ~ boarding,
# family = categorical,
# prior = c(set_prior("normal(0,1)", class = "Intercept",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "Intercept",
# dpar = "muZarpiesonZarpieisland"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "b",
# dpar = "muZarpiesonZarpieisland")),
# data = resampled_data %>%
# filter(age_cat == 4))
#
# dv_comp_4yo_brm <-
# brm(dv_comp ~ 1,
# family = categorical,
# prior = c(set_prior("normal(0,1)", class = "Intercept",
# dpar = "muthesame"),
# set_prior("normal(0,1)", class = "Intercept",
# dpar = "muZarpiesonZarpieisland")),
# data = resampled_data %>%
# filter(age_cat == 4))
#
#
#
# dv_comp_pop_cond_4yo_brm <-
# brm(dv_comp_pop ~ boarding,
# family = bernoulli,
# prior = c(set_prior("normal(0,1)", class = "Intercept"),
# set_prior("normal(0,1)", class = "b")),
# data = resampled_data %>%
# filter(age_cat == 4))
# get p-values, bf
dv_comp_cond_4yo_anova <- dv_comp_cond_4yo %>% Anova()
dv_comp_pop_cond_4yo_anova <- dv_comp_pop_cond_4yo %>% Anova()
dv_comp_cond_7yo_anova <- dv_comp_cond_7yo %>% Anova()
dv_comp_pop_cond_7yo_anova <- dv_comp_pop_cond_7yo %>% Anova()
dv_comp_cond_age_cat_anova <- dv_comp_cond_age_cat %>% Anova()
dv_comp_pop_cond_age_cat_anova <- dv_comp_pop_cond_age_cat %>% Anova()
dv_comp_cond_4yo_p <- dv_comp_cond_4yo_anova["boarding", "Pr(>Chisq)"]
dv_comp_pop_cond_4yo_p <- dv_comp_pop_cond_4yo_anova["boarding", "Pr(>Chisq)"]
# dv_comp_cond_4yo_brm_bf <-
# bayes_factor(dv_comp_cond_4yo_brm, dv_comp_4yo_brm) %>%
# .$bf
#
# dv_comp_pop_cond_4yo_brm_bf <- dv_comp_pop_cond_4yo_brm %>%
# hypothesis("boardingskewed > 0") %>%
# pluck("hypothesis", "Evid.Ratio")
dv_comp_cond_7yo_p <- dv_comp_cond_7yo_anova["boarding", "Pr(>Chisq)"]
dv_comp_pop_cond_7yo_p <- dv_comp_pop_cond_7yo_anova["boarding", "Pr(>Chisq)"]
dv_comp_cond_age_cat_p <- dv_comp_cond_age_cat_anova["boarding:age_cat", "Pr(>Chisq)"]
dv_comp_pop_cond_age_cat_p <- dv_comp_pop_cond_age_cat_anova["boarding:age_cat", "Pr(>Chisq)"]
return(
list("dv_comp_cond_4yo_p" = dv_comp_cond_4yo_p,
"dv_comp_pop_cond_4yo_p" = dv_comp_pop_cond_4yo_p,
# "dv_comp_cond_4yo_brm_bf" = dv_comp_cond_4yo_brm_bf,
# "dv_comp_pop_cond_4yo_brm_bf" = dv_comp_pop_cond_4yo_brm_bf,
"dv_comp_cond_7yo_p" = dv_comp_cond_7yo_p,
"dv_comp_pop_cond_7yo_p" = dv_comp_pop_cond_7yo_p,
"dv_comp_cond_age_cat_p" = dv_comp_cond_age_cat_p,
"dv_comp_pop_cond_age_cat_p" = dv_comp_pop_cond_age_cat_p
)
)
}
# set up simulation grid with parameters
sim_grid <-
crossing(
alpha = alpha,
sample_size_total = sample_sizes_to_test,
simulation = rep(1:n_sim, length(sample_sizes_to_test)) # __ simulations per each possible sample size
)
# run simulations for each row
sim_results <- sim_grid %>%
rowwise() %>%
mutate(
vals = list(run_simulation(sample_size_total)),
dv_comp_cond_4yo_p = vals$dv_comp_cond_4yo_p,
dv_comp_pop_cond_4yo_p = vals$dv_comp_pop_cond_4yo_p,
# dv_comp_cond_4yo_bf = vals$dv_comp_cond_4yo_brm_bf,
# dv_comp_pop_cond_4yo_bf = vals$dv_comp_pop_cond_4yo_brm_bf,
dv_comp_cond_7yo_p = vals$dv_comp_cond_7yo_p,
dv_comp_pop_cond_7yo_p = vals$dv_comp_pop_cond_7yo_p,
dv_comp_cond_age_cat_p = vals$dv_comp_cond_age_cat_p,
dv_comp_pop_cond_age_cat_p = vals$dv_comp_pop_cond_age_cat_p
) %>%
ungroup()
# calculate power
power <- sim_results %>%
group_by(sample_size_total) %>%
summarize(
power_cond_4yo = mean(dv_comp_cond_4yo_p < alpha),
power_pop_cond_4yo = mean(dv_comp_pop_cond_4yo_p < alpha),
# power_cond_4yo_bf = mean(dv_comp_cond_4yo_bf >= bf_threshold),
# power_pop_cond_4yo_bf = mean(dv_comp_pop_cond_4yo_bf >= bf_threshold),
power_cond_7yo = mean(dv_comp_cond_7yo_p < alpha),
power_pop_cond_7yo = mean(dv_comp_pop_cond_7yo_p < alpha),
power_cond_age_cat = mean(dv_comp_cond_age_cat_p < alpha),
power_pop_cond_age_cat = mean(dv_comp_pop_cond_age_cat_p < alpha)
)
# save results
write_csv(power,
"study-1b-children-power-analysis.csv")
| Power on target effects by total sample size (4yo, 7yo only) | ||||||
| based on Study 1a data incl memory check (4yo, 7yo only) | ||||||
| total sample size | 4yo all responses: condition | 4yo pop responses: condition | 7yo all responses: condition | 7yo pop responses: condition | all responses: condition x age_cat | pop responses: condition x age_cat |
|---|---|---|---|---|---|---|
| 200 | 0.965 | 0.048 | 0.561 | 0.613 | 0.949 | 0.345 |
| 400 | 1.000 | 0.051 | 0.835 | 0.867 | 1.000 | 0.600 |
| 500 | 1.000 | 0.054 | 0.901 | 0.919 | 1.000 | 0.723 |
| 600 | 1.000 | 0.054 | 0.957 | 0.972 | 1.000 | 0.777 |
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.7.2
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] brms_2.23.0 Rcpp_1.1.0 effectsize_1.0.1 emmeans_2.0.0
## [5] nnet_7.3-20 lmerTest_3.1-3 lme4_1.1-37 Matrix_1.7-4
## [9] car_3.1-3 carData_3.0-5 gt_1.1.0 lubridate_1.9.4
## [13] forcats_1.0.1 stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0
## [17] readr_2.1.6 tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.1
## [21] tidyverse_2.0.0 here_1.0.2
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 loo_2.8.0
## [4] S7_0.2.1 fastmap_1.2.0 tensorA_0.36.2.1
## [7] TH.data_1.1-5 bayestestR_0.17.0 digest_0.6.38
## [10] estimability_1.5.1 timechange_0.3.0 lifecycle_1.0.4
## [13] survival_3.8-3 posterior_1.6.1 magrittr_2.0.4
## [16] compiler_4.5.2 rlang_1.1.6 sass_0.4.10
## [19] tools_4.5.2 yaml_2.3.10 knitr_1.50
## [22] bridgesampling_1.1-2 bit_4.6.0 xml2_1.5.0
## [25] RColorBrewer_1.1-3 abind_1.4-8 multcomp_1.4-29
## [28] withr_3.0.2 numDeriv_2016.8-1.1 grid_4.5.2
## [31] datawizard_1.3.0 xtable_1.8-4 scales_1.4.0
## [34] MASS_7.3-65 insight_1.4.2 cli_3.6.5
## [37] mvtnorm_1.3-3 crayon_1.5.3 rmarkdown_2.30
## [40] reformulas_0.4.2 generics_0.1.4 RcppParallel_5.1.11-1
## [43] rstudioapi_0.17.1 tzdb_0.5.0 parameters_0.28.2
## [46] minqa_1.2.8 cachem_1.1.0 splines_4.5.2
## [49] bayesplot_1.14.0 parallel_4.5.2 matrixStats_1.5.0
## [52] vctrs_0.6.5 boot_1.3-32 sandwich_3.1-1
## [55] jsonlite_2.0.0 hms_1.1.4 bit64_4.6.0-1
## [58] Formula_1.2-5 jquerylib_0.1.4 glue_1.8.0
## [61] nloptr_2.2.1 codetools_0.2-20 distributional_0.5.0
## [64] stringi_1.8.7 gtable_0.3.6 pillar_1.11.1
## [67] Brobdingnag_1.2-9 htmltools_0.5.8.1 R6_2.6.1
## [70] Rdpack_2.6.4 rprojroot_2.1.1 vroom_1.6.6
## [73] evaluate_1.0.5 lattice_0.22-7 backports_1.5.0
## [76] rbibutils_2.4 bslib_0.9.0 rstantools_2.5.0
## [79] checkmate_2.3.3 coda_0.19-4.1 nlme_3.1-168
## [82] xfun_0.54 fs_1.6.6 zoo_1.8-14
## [85] pkgconfig_2.0.3