Power analyses on testing 4-8yo

Power analyses bootstrapped off the data from Study 1a in children (incl memory check).

We can resample various sample sizes from the current data (with replacement), and run the following analyses, to see how many times we get significant effects at each sample size (=statistical power).

Multinomial model predicting responses by condition, age, and their interaction.
Logistic model predicting responses of “Zarpies on Zarpie island” by condition, age, and their interaction.
Logistic model predicting correct responses by condition, age, and their interaction.
Logistic model predicting responses of “Zarpies on Zarpie island” (aka correct response) by age, within the skewed condition only.
Bayesian logistic model predicting responses by condition, age, and their interaction, with Normal(0, 1) prior on the intercept and all fixed effects.
Bayesian logistic model predicting responses of “Zarpies on Zarpie island” by condition, age, and their interaction, with Normal(0, 1) prior on the intercept and all fixed effects.

# set simulation parameters
set.seed(42) # reproducibility

n_sim <- # per sample size
  1000 

sample_sizes_to_test <- c( # total sample sizes
  200, 
  400, 
  600) 

# set criteria for power
alpha <- 0.05
bf_threshold <- 10

# function to run a simulation
run_simulation <- function(sample_size_total){
  
  # resample data
  resampled_data <- bind_rows(
    data_incl_memory_check %>% 
      filter(boarding == "skewed") %>% 
      slice_sample(n = sample_size_total/2, replace = TRUE),
    data_incl_memory_check %>% 
      filter(boarding == "not skewed") %>% 
      slice_sample(n = sample_size_total/2, replace = TRUE)) %>% 
    mutate(observation = row_number())
    
  # fit models
  dv_comp_cond_age <-
    multinom(dv_comp ~ boarding * age_exact, 
             data = resampled_data)
  
  dv_comp_pop_cond_age <-
    glm(dv_comp_pop ~ boarding * age_exact, 
        family = "binomial",
        data = resampled_data)
  
  dv_comp_correct_cond_age <-
    glm(dv_comp_correct ~ boarding * age_exact, 
        family = "binomial",
        data = resampled_data)
  
  dv_comp_pop_skewed_age <-
    glm(dv_comp_pop ~ age_exact, 
        family = "binomial",
        data = resampled_data %>% 
          filter(boarding == "skewed"))
  
  
  
  # dv_comp_cond_age_brm <-
  #   brm(dv_comp ~ boarding * age_exact,
  #       family = categorical,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muZarpiesonZarpieisland"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muZarpiesonZarpieisland")),
  #       data = resampled_data)
  # 
  # dv_comp_cond_age_brm_null <-
  #   brm(dv_comp ~ boarding + age_exact,
  #       family = categorical,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muZarpiesonZarpieisland"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muZarpiesonZarpieisland")),
  #     data = resampled_data)
  # 
  # dv_comp_pop_cond_age_brm <-
  #   brm(dv_comp_pop ~ boarding * age_exact,
  #       family = bernoulli,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept"),
  #                 set_prior("normal(0,1)", class = "b")),
  #       data = resampled_data)
  

  
  # get p-values, bf
  dv_comp_cond_age_anova <- dv_comp_cond_age %>% Anova()
  dv_comp_pop_cond_age_anova <- dv_comp_pop_cond_age %>% Anova()
  dv_comp_correct_cond_age_anova <- dv_comp_correct_cond_age %>% Anova()
  dv_comp_pop_skewed_age_anova <- dv_comp_pop_skewed_age %>% Anova()
    
  dv_comp_cond_age_p <- dv_comp_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
  dv_comp_pop_cond_age_p <- dv_comp_pop_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
  dv_comp_correct_cond_age_p <- dv_comp_correct_cond_age_anova["boarding:age_exact", "Pr(>Chisq)"]
  dv_comp_pop_skewed_age_p <- dv_comp_pop_skewed_age_anova["age_exact", "Pr(>Chisq)"]
  
  # dv_comp_cond_age_brm_bf <- bayes_factor(dv_comp_cond_age_brm, dv_comp_cond_age_brm_null) %>% .$bf
  # dv_comp_pop_cond_age_brm_bf <- dv_comp_pop_cond_age_brm %>% hypothesis("boardingskewed:age_exact > 0") %>% pluck("hypothesis", "Evid.Ratio")

  return(list(
    "dv_comp_cond_age_p" = dv_comp_cond_age_p, 
    "dv_comp_pop_cond_age_p" = dv_comp_pop_cond_age_p, 
    "dv_comp_correct_cond_age_p" = dv_comp_correct_cond_age_p, 
    "dv_comp_pop_skewed_age_p" = dv_comp_pop_skewed_age_p
    
    # "dv_comp_cond_age_brm_bf" = dv_comp_cond_age_brm_bf, 
    # "dv_comp_pop_cond_age_brm_bf" = dv_comp_pop_cond_age_brm_bf
    )
    )
}


# set up simulation grid with parameters
sim_grid <- 
  crossing(
    alpha = alpha,
    sample_size_total = sample_sizes_to_test, 
    simulation = rep(1:n_sim, length(sample_sizes_to_test)) # __ simulations per each possible sample size
  ) 

# run simulations for each row
sim_results <- sim_grid %>% 
  rowwise() %>%
  mutate(
    vals = list(run_simulation(sample_size_total)),
    
    dv_comp_cond_age_p = vals$dv_comp_cond_age_p, 
    dv_comp_pop_cond_age_p = vals$dv_comp_pop_cond_age_p, 
    dv_comp_correct_cond_age_p = vals$dv_comp_correct_cond_age_p, 
    dv_comp_pop_skewed_age_p = vals$dv_comp_pop_skewed_age_p,
    
    # dv_comp_cond_age_brm_bf = vals$dv_comp_cond_age_brm_bf, 
    # dv_comp_pop_cond_age_brm_bf = vals$dv_comp_pop_cond_age_brm_bf
    
  ) %>%
  ungroup() 


# calculate power
power <- sim_results %>%
  group_by(sample_size_total) %>%
  summarize(
    power_cond_age = mean(dv_comp_cond_age_p < alpha),
    power_pop_cond_age = mean(dv_comp_pop_cond_age_p < alpha),
    power_correct_cond_age = mean(dv_comp_correct_cond_age_p < alpha),
    power_pop_skewed_age = mean(dv_comp_pop_skewed_age_p < alpha),
    
    # power_cond_age_bf = mean(dv_comp_cond_age_brm_bf >= bf_threshold),
    # power_pop_cond_age_bf = mean(dv_comp_pop_cond_age_brm_bf >= bf_threshold)
  ) 

# save results
write_csv(power, "study-1b-children-power-analysis.csv")

total sample size	all responses: condition x age interaction	pop responses: condition x age interaction	correct responses: condition x age interaction	pop responses in skewed condition: age
Power on target effects by total sample size (4-8yo)
based on Study 1a data incl memory check (4-8yo)
200	0.078	0.081	0.293	0.200
400	0.080	0.109	0.497	0.358
600	0.119	0.142	0.681	0.502

Power analyses on testing just 4yo, 7yo

Power analyses bootstrapped off 4yo and 7yo data from Study 1a in children (incl memory check).

We can resample various sample sizes from the particular age group of interest in the current data (including memory check failures, with replacement), and run the following analyses, to see how many times we get significant effects at each sample size (=statistical power).

Within 4yo, multinomial model predicting responses by condition.
Within 4yo, logistic model predicting responses of “Zarpies on Zarpie island” by condition.

Within 7yo, multinomial model predicting responses by condition.
Within 7yo, logistic model predicting responses of “Zarpies on Zarpie island” by condition.
Overall multinomial model predicting responses by condition, age group (4yo vs 7yo) and their interaction, testing for the interaction.
Overall logistic model predicting responses of “Zarpies on Zarpie island” by condition, age group (4yo vs 7yo) and their interaction, testing for the interaction.

# set simulation parameters
set.seed(42) # reproducibility

n_sim <- # per sample size
  # 100
  1000

sample_sizes_to_test <- # total sample sizes
  c(200, 
    400,
    500,
    600) 

# set criteria for power
alpha <- 0.05
bf_threshold <- 10

# function to run a simulation
run_simulation <- function(sample_size_total){
  
  # resample data incl memory check
  resampled_data <- bind_rows(
    data_incl_memory_check %>% 
      filter(boarding == "skewed") %>% 
      filter(age_cat == 4) %>%
      slice_sample(n = sample_size_total/4, replace = TRUE),
    data_incl_memory_check %>% 
      filter(boarding == "not skewed") %>% 
      filter(age_cat == 4) %>%
      slice_sample(n = sample_size_total/4, replace = TRUE),
    data_incl_memory_check %>% 
      filter(boarding == "skewed") %>% 
      filter(age_cat == 7) %>%
      slice_sample(n = sample_size_total/4, replace = TRUE),
    data_incl_memory_check %>% 
      filter(boarding == "not skewed") %>% 
      filter(age_cat == 7) %>%
      slice_sample(n = sample_size_total/4, replace = TRUE),
    ) %>% 
    mutate(observation = row_number())
    
  
  # fit models
  dv_comp_cond_4yo <-
    multinom(dv_comp ~ boarding,
             data = resampled_data %>%
               filter(age_cat == 4))

  dv_comp_pop_cond_4yo <-
    glm(dv_comp_pop ~ boarding,
        family = "binomial",
        data = resampled_data %>%
          filter(age_cat == 4))


  dv_comp_cond_7yo <-
    multinom(dv_comp ~ boarding,
             data = resampled_data %>%
               filter(age_cat == 7))

  dv_comp_pop_cond_7yo <-
    glm(dv_comp_pop ~ boarding,
        family = "binomial",
        data = resampled_data %>%
          filter(age_cat == 7))


  dv_comp_cond_age_cat <-
    multinom(dv_comp ~ boarding * age_cat,
             data = resampled_data)

  dv_comp_pop_cond_age_cat <-
    glm(dv_comp_pop ~ boarding * age_cat,
        family = "binomial",
        data = resampled_data)

  
  # dv_comp_cond_4yo_brm <-
  #   brm(dv_comp ~ boarding,
  #       family = categorical,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muZarpiesonZarpieisland"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "b",
  #                           dpar = "muZarpiesonZarpieisland")),
  #       data = resampled_data %>% 
  #         filter(age_cat == 4))
  # 
  # dv_comp_4yo_brm <-
  #   brm(dv_comp ~ 1,
  #       family = categorical,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muthesame"),
  #                 set_prior("normal(0,1)", class = "Intercept",
  #                           dpar = "muZarpiesonZarpieisland")),
  #       data = resampled_data %>% 
  #         filter(age_cat == 4))
  # 
  # 
  # 
  # dv_comp_pop_cond_4yo_brm <-
  #   brm(dv_comp_pop ~ boarding,
  #       family = bernoulli,
  #       prior = c(set_prior("normal(0,1)", class = "Intercept"),
  #                 set_prior("normal(0,1)", class = "b")),
  #       data = resampled_data %>% 
  #         filter(age_cat == 4))

  
  # get p-values, bf
  dv_comp_cond_4yo_anova <- dv_comp_cond_4yo %>% Anova()
  dv_comp_pop_cond_4yo_anova <- dv_comp_pop_cond_4yo %>% Anova()

  dv_comp_cond_7yo_anova <- dv_comp_cond_7yo %>% Anova()
  dv_comp_pop_cond_7yo_anova <- dv_comp_pop_cond_7yo %>% Anova()

  dv_comp_cond_age_cat_anova <- dv_comp_cond_age_cat %>% Anova()
  dv_comp_pop_cond_age_cat_anova <- dv_comp_pop_cond_age_cat %>% Anova()


  dv_comp_cond_4yo_p <- dv_comp_cond_4yo_anova["boarding", "Pr(>Chisq)"]
  dv_comp_pop_cond_4yo_p <- dv_comp_pop_cond_4yo_anova["boarding", "Pr(>Chisq)"]

  # dv_comp_cond_4yo_brm_bf <- 
  #   bayes_factor(dv_comp_cond_4yo_brm, dv_comp_4yo_brm) %>% 
  #   .$bf
  # 
  # dv_comp_pop_cond_4yo_brm_bf <- dv_comp_pop_cond_4yo_brm %>% 
  #   hypothesis("boardingskewed > 0") %>% 
  #   pluck("hypothesis", "Evid.Ratio")
  
  dv_comp_cond_7yo_p <- dv_comp_cond_7yo_anova["boarding", "Pr(>Chisq)"]
  dv_comp_pop_cond_7yo_p <- dv_comp_pop_cond_7yo_anova["boarding", "Pr(>Chisq)"]
  
  dv_comp_cond_age_cat_p <- dv_comp_cond_age_cat_anova["boarding:age_cat", "Pr(>Chisq)"]
  dv_comp_pop_cond_age_cat_p <- dv_comp_pop_cond_age_cat_anova["boarding:age_cat", "Pr(>Chisq)"]
  


  return(
    list("dv_comp_cond_4yo_p" = dv_comp_cond_4yo_p, 
         "dv_comp_pop_cond_4yo_p" = dv_comp_pop_cond_4yo_p,
         
         # "dv_comp_cond_4yo_brm_bf" = dv_comp_cond_4yo_brm_bf, 
         # "dv_comp_pop_cond_4yo_brm_bf" = dv_comp_pop_cond_4yo_brm_bf,
         
         "dv_comp_cond_7yo_p" = dv_comp_cond_7yo_p, 
         "dv_comp_pop_cond_7yo_p" = dv_comp_pop_cond_7yo_p,
         
         "dv_comp_cond_age_cat_p" = dv_comp_cond_age_cat_p,
         "dv_comp_pop_cond_age_cat_p" = dv_comp_pop_cond_age_cat_p
        )
    )
}


# set up simulation grid with parameters
sim_grid <- 
  crossing(
    alpha = alpha,
    sample_size_total = sample_sizes_to_test, 
    simulation = rep(1:n_sim, length(sample_sizes_to_test)) # __ simulations per each possible sample size
  ) 

# run simulations for each row
sim_results <- sim_grid %>% 
  rowwise() %>%
  mutate(
    vals = list(run_simulation(sample_size_total)),
    
    dv_comp_cond_4yo_p = vals$dv_comp_cond_4yo_p, 
    dv_comp_pop_cond_4yo_p = vals$dv_comp_pop_cond_4yo_p,
    
    # dv_comp_cond_4yo_bf = vals$dv_comp_cond_4yo_brm_bf, 
    # dv_comp_pop_cond_4yo_bf = vals$dv_comp_pop_cond_4yo_brm_bf,
         
    dv_comp_cond_7yo_p = vals$dv_comp_cond_7yo_p, 
    dv_comp_pop_cond_7yo_p = vals$dv_comp_pop_cond_7yo_p,
    
    dv_comp_cond_age_cat_p = vals$dv_comp_cond_age_cat_p,
    dv_comp_pop_cond_age_cat_p = vals$dv_comp_pop_cond_age_cat_p
    
  ) %>%
  ungroup() 


# calculate power
power <- sim_results %>%
  group_by(sample_size_total) %>%
  summarize(
    power_cond_4yo = mean(dv_comp_cond_4yo_p < alpha),
    power_pop_cond_4yo = mean(dv_comp_pop_cond_4yo_p < alpha),
    
    # power_cond_4yo_bf = mean(dv_comp_cond_4yo_bf >= bf_threshold),
    # power_pop_cond_4yo_bf = mean(dv_comp_pop_cond_4yo_bf >= bf_threshold),
    
    power_cond_7yo = mean(dv_comp_cond_7yo_p < alpha),
    power_pop_cond_7yo = mean(dv_comp_pop_cond_7yo_p < alpha),
    
    power_cond_age_cat = mean(dv_comp_cond_age_cat_p < alpha),
    power_pop_cond_age_cat = mean(dv_comp_pop_cond_age_cat_p < alpha)
  ) 

# save results
write_csv(power,
          "study-1b-children-power-analysis.csv")

total sample size	4yo all responses: condition	4yo pop responses: condition	7yo all responses: condition	7yo pop responses: condition	all responses: condition x age_cat	pop responses: condition x age_cat
Power on target effects by total sample size (4yo, 7yo only)
based on Study 1a data incl memory check (4yo, 7yo only)
200	0.965	0.048	0.561	0.613	0.949	0.345
400	1.000	0.051	0.835	0.867	1.000	0.600
500	1.000	0.054	0.901	0.919	1.000	0.723
600	1.000	0.054	0.957	0.972	1.000	0.777

Session info

## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.7.2
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] brms_2.23.0      Rcpp_1.1.0       effectsize_1.0.1 emmeans_2.0.0   
##  [5] nnet_7.3-20      lmerTest_3.1-3   lme4_1.1-37      Matrix_1.7-4    
##  [9] car_3.1-3        carData_3.0-5    gt_1.1.0         lubridate_1.9.4 
## [13] forcats_1.0.1    stringr_1.6.0    dplyr_1.1.4      purrr_1.2.0     
## [17] readr_2.1.6      tidyr_1.3.1      tibble_3.3.0     ggplot2_4.0.1   
## [21] tidyverse_2.0.0  here_1.0.2      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1      farver_2.1.2          loo_2.8.0            
##  [4] S7_0.2.1              fastmap_1.2.0         tensorA_0.36.2.1     
##  [7] TH.data_1.1-5         bayestestR_0.17.0     digest_0.6.38        
## [10] estimability_1.5.1    timechange_0.3.0      lifecycle_1.0.4      
## [13] survival_3.8-3        posterior_1.6.1       magrittr_2.0.4       
## [16] compiler_4.5.2        rlang_1.1.6           sass_0.4.10          
## [19] tools_4.5.2           yaml_2.3.10           knitr_1.50           
## [22] bridgesampling_1.1-2  bit_4.6.0             xml2_1.5.0           
## [25] RColorBrewer_1.1-3    abind_1.4-8           multcomp_1.4-29      
## [28] withr_3.0.2           numDeriv_2016.8-1.1   grid_4.5.2           
## [31] datawizard_1.3.0      xtable_1.8-4          scales_1.4.0         
## [34] MASS_7.3-65           insight_1.4.2         cli_3.6.5            
## [37] mvtnorm_1.3-3         crayon_1.5.3          rmarkdown_2.30       
## [40] reformulas_0.4.2      generics_0.1.4        RcppParallel_5.1.11-1
## [43] rstudioapi_0.17.1     tzdb_0.5.0            parameters_0.28.2    
## [46] minqa_1.2.8           cachem_1.1.0          splines_4.5.2        
## [49] bayesplot_1.14.0      parallel_4.5.2        matrixStats_1.5.0    
## [52] vctrs_0.6.5           boot_1.3-32           sandwich_3.1-1       
## [55] jsonlite_2.0.0        hms_1.1.4             bit64_4.6.0-1        
## [58] Formula_1.2-5         jquerylib_0.1.4       glue_1.8.0           
## [61] nloptr_2.2.1          codetools_0.2-20      distributional_0.5.0 
## [64] stringi_1.8.7         gtable_0.3.6          pillar_1.11.1        
## [67] Brobdingnag_1.2-9     htmltools_0.5.8.1     R6_2.6.1             
## [70] Rdpack_2.6.4          rprojroot_2.1.1       vroom_1.6.6          
## [73] evaluate_1.0.5        lattice_0.22-7        backports_1.5.0      
## [76] rbibutils_2.4         bslib_0.9.0           rstantools_2.5.0     
## [79] checkmate_2.3.3       coda_0.19-4.1         nlme_3.1-168         
## [82] xfun_0.54             fs_1.6.6              zoo_1.8-14           
## [85] pkgconfig_2.0.3

Structural skew

Study 1b children power analysis

Marianna Zhang

2025-11-21

Power analyses on testing 4-8yo

Power analyses on testing just 4yo, 7yo

Session info