Setup

What is power?

Power analysis parameters from Jusczyk & Aslin (1995) Experiment1

set.seed(100)
num_simulations <- 500
alpha <- 0.05 # significance level
sample_sizes <- c(10, 15, 20, 25, 30, 35, 40, 45, 50) # sample sizes to test

mean_fam <- 8.29
sd_mean_fam   <- 2.09
mean_unf <- 7.04
sd_mean_unf   <- 2.63
n_trials <- 8

# Convert Mean SDs to account for trials: SD_trial = SD_mean * sqrt(n_trials)
var_trial_fam <- (sd_mean_fam * sqrt(n_trials))^2
var_trial_unf <- (sd_mean_unf * sqrt(n_trials))^2
avg_var_trial <- (var_trial_fam + var_trial_unf) / 2
# Split the variance: infant vs. trial
within_corr_values <- c(0.3, 0.4, 0.5, 0.6)
# ICC = Subject var/(subject var + residual var)

# --- Simulation Function ---
run_power_sim <- function(n, icc) {
  significant_results <- 0 # Count how many times p < 0.05
  
  # Calculate SDs based on the ICC; SD = sqrt(Var_total * Proportion)
  sd_subject <- sqrt(avg_var_trial * icc)
  sd_residual_fam <- sqrt(var_trial_fam * (1 - icc))
  sd_residual_unf <- sqrt(var_trial_unf * (1 - icc))
  
  for (i in 1:num_simulations) {
    # Create random intercepts
    subj_intercepts <- rnorm(n, 0, sd_subject)
    
    df <- expand.grid(ID = 1:n, Condition = c("Fam", "Unf"), Trial = 1:n_trials) %>%
      mutate(
        # Set means and SDs specific to each condition
        BaseMean = ifelse(Condition == "Fam", mean_fam, mean_unf),
        ResidSD  = ifelse(Condition == "Fam", sd_residual_fam, sd_residual_unf),
        SubjEff  = subj_intercepts[ID],
        # Generate residual noise (the remaining variance not explained by individuals)
        Error    = rnorm(n(), 0, ResidSD),
        LookTime = BaseMean + SubjEff + Error
      )
    
    # Model
    model <- tryCatch(
      lmer(LookTime ~ Condition + (1|ID), data = df),
      error = function(e) return(NULL)
    )
    
    if (!is.null(model)) {
      p_val <- summary(model)$coefficients["ConditionUnf", "Pr(>|t|)"]
      if (p_val < alpha) significant_results <- significant_results + 1
    }
  }
  return(significant_results / num_simulations)
}

# --- Execute Grid Search ---
# This creates a table of every combination
results_grid <- expand.grid(N = sample_sizes, ICC = within_corr_values)

# Running the simulation for each row
#results_grid$Power <- mapply(run_power_sim, results_grid$N, results_grid$ICC)

# Save the R object
#saveRDS(results_grid, "power_analysis_results.rds")
# Load the R object
results_grid <- readRDS("power_analysis_results.rds")

Plot

ggplot(results_grid, aes(x = N, y = Power, color = as.factor(ICC), group = ICC)) +
  # Add the 80% power threshold line
  geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", size = 0.8) +
  # Add the power curves
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  # Formatting and Labels
  scale_y_continuous(labels = scales::percent, limits = c(0, 1.05), breaks = seq(0, 1, 0.1)) +
  scale_x_continuous(breaks = unique(results_grid$N)) +
  scale_color_viridis_d(name = "ICC", option = "C", end = 0.8) +
  labs(
    title = "Power Analysis: Head-Turn Preference Paradigm",
    x = "Sample Size (Number of Infants)",
    y = "Statistical Power"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "right",
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold"),
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

References: https://www.nicolaromano.net/data-thoughts/power-analysis-simulation/
https://osf.io/preprints/psyarxiv/vxfbh_v2