Load necessary packages

library(tidyverse) # For data manipulation and visualization
library(effectsize) # For effect size calculation
library(plyr) # For data manipulation
library(ggpubr) # For organising plots
library(lmerTest) # For fitting lmm models
library(ordinal) # For fitting clmm models
library(emmeans) # For testing contrasts in clmm models and obtain p-values
library(kableExtra) # For tables

Set random seed for reproducibility

set.seed(1)

Simulate a population

This is a simulated a population (N = 60,000) in which the effect size of the (paired) difference between two conditions (A and B) is d = 0.4

Create simulated dataset with two columns (A and B) representing two conditions with different binomial probabilities

dat <- tibble(A = rbinom(60000, size = 4, prob = 0.445) + 1,
              B = rbinom(60000, size = 4, prob = 0.545) + 1,
              Participant = rep(paste0("P", 1:10000), each = 6),
              Pairs = rep(1:6, times = 10000))

# Alternative with less "normal" looking distributions for A and B
p_plausible <- c(.25, .07, .02, .03, .25)

dat2 <- clmm_generate_data(n_participants = 10000,
                          n_trials = 6,
                          control_distribution = p_plausible,
                          effect = 1,
                          participant_variation = 1,
                          within_subject = TRUE,
                          control_weight = .5) %>%
  mutate(group = ifelse(group == 0, "A", "B")) %>%
  dplyr::rename(Value = pas) %>%
  dplyr::rename(Condition = group) %>%
  dplyr::rename(Participant = id) %>%
  mutate(Participant = paste0("p", Participant)) %>%
  select(1:3) %>%
  mutate(pair = rep(1:6, times = 20000)) %>%
  pivot_wider(names_from = Condition, values_from = Value)

Transform data into long format

dat.long <- dat %>% 
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")
dat.long2 <- dat2 %>% 
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

Data frame of participant names

part <- tibble(Participant = unique(dat$Participant))
part2 <- tibble(Participant = unique(dat2$Participant))

Calculate the mean and standard deviation of “Value” for each condition

mu <- dat.long %>% 
  group_by(Condition) %>%    # Condition the data by the "Condition" column
  dplyr::summarise(     # Summarize the data by calculating the mean and standard deviation
    Mean = mean(as.numeric(Value)), # Mean of "Value" for each condition
    SD= sd(as.numeric(Value)))       # Standard deviation of "Value" for each condition

mu2 <- dat.long2 %>% 
  group_by(Condition) %>%    # Condition the data by the "Condition" column
  dplyr::summarise(     # Summarize the data by calculating the mean and standard deviation
    Mean = mean(as.numeric(Value)), # Mean of "Value" for each condition
    SD= sd(as.numeric(Value))) 

# Print table
rbind(mu, mu2) %>%
  mutate(Distribution = c("Modal", "Modal", "Bimodal", "Bimodal")) %>%
  select(4, 1:3) %>%
  kable(digits = 2) %>%
  collapse_rows() %>%
  kable_paper("hover", full_width = F)

Distribution	Condition	Mean	SD
Modal	A	2.78	1.00
Modal	B	3.19	1.00
Bimodal	A	3.60	1.78
Bimodal	B	4.25	1.45

Compute effect size (Cohen’s d) between the two conditions

# Modal
general.d <- cohens_d(x = dat$A, y = dat$B,
                     method = "paired")
general.d <- interpret_cohens_d(general.d)

general.rb <- rank_biserial(Value ~ Condition,
                    data = dat.long, 
                    paired = TRUE)
general.rb <- interpret_rank_biserial(general.rb)

general.w <- kendalls_w(Value ~ Condition | Participant,
                        data = dat.long[1:6000,],
                        verbose = FALSE)
general.w <- interpret_kendalls_w(general.w)

general.rws <- rank_eta_squared(Value ~ Condition,
                                data = dat.long)
general.rws <- interpret_eta_squared(general.rws)

# Bimodal
general.d2 <- cohens_d(x = dat2$A, y = dat2$B,
                     method = "paired")
general.d2 <- interpret_cohens_d(general.d2)

general.rb2 <- rank_biserial(Value ~ Condition,
                    data = dat.long2, 
                    paired = TRUE)
general.rb2 <- interpret_rank_biserial(general.rb2)

general.w2 <- kendalls_w(Value ~ Condition | Participant,
                         data = dat.long2[1:6000,],
                         verbose = FALSE)
general.w2 <- interpret_kendalls_w(general.w2)

general.rws2 <- rank_eta_squared(Value ~ Condition,
                                data = dat.long2)
general.rws2 <- interpret_eta_squared(general.rws2)

Plot histogram of data with vertical lines for mean of each condition and label for effect size

p1a <- ggplot(dat.long, aes(x = Value, fill = Condition, colour = Condition)) +
  geom_histogram(alpha = 0.3, position = "identity", binwidth = 1) +
  geom_vline(data = mu, aes(xintercept = Mean, color = Condition),
             linetype = "dashed") +
  labs(x = "Score", y = "Count", title = "Modal") +
  annotate("text", x = 2, y = 30000, 
           label = paste0("Cohen's d = ", 
                          round(general.d$Cohens_d, 2),
                          ", 95%CI [",
                          round(general.d$CI_low, 2),
                          " — ",
                          round(general.d$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 28000, 
           label = paste0("r (Rank biserial) = ", 
                          round(general.rb$r_rank_biserial, 2),
                          ", 95%CI [",
                          round(general.rb$CI_low, 2),
                          " — ",
                          round(general.rb$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 26000, 
           label = paste0("Kendall's W = ", 
                          round(general.w$Kendalls_W, 2),
                          ", 95%CI [",
                          round(general.w$CI_low, 2),
                          " — ",
                          round(general.w$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 24000, 
           label = paste0("Rank eta squared = ", 
                          round(general.rws$rank_eta_squared, 2),
                          ", 95%CI [",
                          round(general.rws$CI_low, 2),
                          " — ",
                          round(general.rws$CI_high, 2),
                          "]"))

p1b <- ggplot(dat.long2, aes(x = Value, fill = Condition, colour = Condition)) +
  geom_histogram(alpha = 0.3, position = "identity", binwidth = 1) +
  geom_vline(data = mu2, aes(xintercept = Mean, color = Condition),
             linetype = "dashed") +
  labs(x = "Score", y = "Count", title = "Bimodal") +
  annotate("text", x = 2, y = 50000, 
           label = paste0("Cohen's d = ", 
                          round(general.d2$Cohens_d, 2),
                          ", 95%CI [",
                          round(general.d2$CI_low, 2),
                          " — ",
                          round(general.d2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 47000, 
           label = paste0("r (Rank biserial) = ", 
                          round(general.rb2$r_rank_biserial, 2),
                          ", 95%CI [",
                          round(general.rb2$CI_low, 2),
                          " — ",
                          round(general.rb2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 44000, 
           label = paste0("Kendall's W = ", 
                          round(general.w2$Kendalls_W, 2),
                          ", 95%CI [",
                          round(general.w2$CI_low, 2),
                          " — ",
                          round(general.w2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 41000, 
           label = paste0("Rank eta squared = ", 
                          round(general.rws2$rank_eta_squared, 2),
                          ", 95%CI [",
                          round(general.rws2$CI_low, 2),
                          " — ",
                          round(general.rws2$CI_high, 2),
                          "]"))

ggarrange(p1a, p1b,
          nrow = 2,
          common.legend = TRUE,
          legend = "bottom")

Histogram of the two conditions (A, B). Vertical dashed lines represente the mean of each condition.

Power analysis from t-test, Wilcoxon signed-rank tests, and Linear Mixed Models (LMM)

t-tests

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.t = 1000 # Number of simulations to run
sample_size.t = 82 # Sample size for each simulation
alpha.t = 0.05 # Significance level

Create simulation-based dataset with defined sample size

samples.t <- map_dfr(seq_len(num_sims.t), ~dat %>%
                     sample_n(sample_size.t) %>% 
                     mutate(sample.t = as.factor(.x)))

samples.t2 <- map_dfr(seq_len(num_sims.t), ~dat2 %>%
                     sample_n(sample_size.t) %>% 
                     mutate(sample.t = as.factor(.x)))

Define function to compute paired t-test

compfun <- function(x, y) {
  comp = (t.test(x, y,
                 alternative = "two.sided", paired = TRUE))
}

Perform t-test on each simulated sample and save p-value and effect size (t) to new dataset (comps)

comps.t <- ddply(samples.t, .(sample.t), summarise,
                 t = round(compfun(A, B)$statistic, 2),
                 p = round(compfun(A, B)$p.value, 3),
                 "Statistical signicance" = ifelse(p <= alpha.t, "Significant", "Non-significant"))

comps.t2 <- ddply(samples.t2, .(sample.t), summarise,
                 t = round(compfun(A, B)$statistic, 2),
                 p = round(compfun(A, B)$p.value, 3),
                 "Statistical signicance" = ifelse(p <= alpha.t, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.t <- sum(comps.t$`Statistical signicance` == "Significant") / num_sims.t

power.t2 <- sum(comps.t2$`Statistical signicance` == "Significant") / num_sims.t

Plot histogram of p-values with label for power and sample size

p.t1 <- ggplot(comps.t, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.t, breaks = seq(0, 1, alpha.t)) +
  labs(y = "Count", x = "p-value", title = "Paired t-tests", subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(comps.t$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.t, 3), "\nSample size = ", sample_size.t))

p.t2 <- ggplot(comps.t2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.t, breaks = seq(0, 1, alpha.t)) +
  labs(y = "Count", x = "p-value", title = " ",  subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(comps.t2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.t2, 3), "\nSample size = ", sample_size.t))

p.t <- ggarrange(p.t1, p.t2,
                 common.legend = TRUE,
                 legend = "bottom")
p.t

Wilcoxon signed-rank tests

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.wilcox = 1000 # Number of simulations to run
sample_size.wilcox = 83 # Sample size for each simulation
alpha.wilcox = 0.05 # Significance level

Create simulation-based dataset with defined sample size

samples.wilcox <- map_dfr(seq_len(num_sims.wilcox), ~dat %>%
                            sample_n(sample_size.wilcox) %>%
                            mutate(sample.wilcox = as.factor(.x)))

samples.wilcox2 <- map_dfr(seq_len(num_sims.wilcox), ~dat2 %>%
                            sample_n(sample_size.wilcox) %>%
                            mutate(sample.wilcox = as.factor(.x)))

Define function to compute Wilcoxon signed-rank tests

wilcoxfun <- function(x, y) {
  comp = (wilcox.test(x, y,
                      alternative = "two.sided", paired = TRUE))
}

Perform Wilcoxon signed-rank tests on each simulated dataset and save p-value and effect size (V) to new dataset (comps)

wilcox.comps <- ddply(samples.wilcox, .(sample.wilcox), summarise,
                      V = round(wilcoxfun(A, B)$statistic, 2),
                      p = round(wilcoxfun(A, B)$p.value, 3),
                      "Statistical signicance" = ifelse(p <= alpha.wilcox, "Significant", "Non-significant"))

wilcox.comps2 <- ddply(samples.wilcox2, .(sample.wilcox), summarise,
                      V = round(wilcoxfun(A, B)$statistic, 2),
                      p = round(wilcoxfun(A, B)$p.value, 3),
                      "Statistical signicance" = ifelse(p <= alpha.wilcox, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.wilcox <- sum(wilcox.comps$`Statistical signicance` == "Significant") / num_sims.wilcox

power.wilcox2 <- sum(wilcox.comps2$`Statistical signicance` == "Significant") / num_sims.wilcox

Plot histogram of p-values with label for power and sample size

p.wilcox1 <- ggplot(wilcox.comps, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.wilcox, breaks = seq(0, 1, alpha.wilcox)) +
  labs(y = "Count", x = "p-value", title = "Wilcoxon signed-rank tests", subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(wilcox.comps$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.wilcox, 3), "\nSample size = ", sample_size.wilcox))

p.wilcox2 <- ggplot(wilcox.comps2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.wilcox, breaks = seq(0, 1, alpha.wilcox)) +
  labs(y = "Count", x = "p-value", title = " ", subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(wilcox.comps2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.wilcox2, 3), "\nSample size = ", sample_size.wilcox))

p.wilcox <- ggarrange(p.wilcox1, p.wilcox2,
                      common.legend = TRUE,
                      legend = "bottom")
p.wilcox

Linear Mixed Models

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.lmm = 1000 # Number of simulations to run
sample_size.lmm = 14 # Sample size for each simulation (number of participants)
alpha.lmm = 0.05 # Significance level

Create simulation-based dataset of participants with defined sample size

samples.lmm <- map_dfr(seq_len(num_sims.lmm), ~part %>%
                         sample_n(sample_size.lmm) %>%
                         mutate(sample.lmm = as.factor(.x)))

samples.lmm2 <- map_dfr(seq_len(num_sims.lmm), ~part2 %>%
                         sample_n(sample_size.lmm) %>%
                         mutate(sample.lmm = as.factor(.x)))

Full simulation-based dataset with all data per particioant, in long format

samples.lmm.long <- left_join(samples.lmm, dat, by = "Participant",
                              relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

samples.lmm.long2 <- left_join(samples.lmm2, dat2, by = "Participant",
                              relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

Define function to compute lmm

lmmfun <- function(y, x, z) {
  mod = (anova(lmer(y ~ x + (1 | z))))
}

Fit linear mixed model on each simulated sample and write p-value to new dataset (lmm.comps)

lmm.comps <- ddply(samples.lmm.long, .(sample.lmm), summarise,
                   p = round(lmmfun(Value, Condition, Participant)$`Pr(>F)`, 3),
                   "Statistical signicance" = ifelse(p <= alpha.lmm, "Significant", "Non-significant"))

lmm.comps2 <- ddply(samples.lmm.long2, .(sample.lmm), summarise,
                   p = round(lmmfun(Value, Condition, Participant)$`Pr(>F)`, 3),
                   "Statistical signicance" = ifelse(p <= alpha.lmm, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.lmm <- sum(lmm.comps$`Statistical signicance` == "Significant") / num_sims.lmm

power.lmm2 <- sum(lmm.comps2$`Statistical signicance` == "Significant") / num_sims.lmm

Plot histogram of p-values with label for power and sample size

p.lmm1 <- ggplot(lmm.comps, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.lmm, breaks = seq(0, 1, alpha.lmm)) +
  labs(y = "Count", x = "p-value", title = "Linear Mixed Models. Call: lmer(Value ~ Condition + (1 | Participant))",
       subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(lmm.comps$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.lmm, 3), 
                          "\nSample size = ", sample_size.lmm, 
                          " participants\n(6 paired responses per participant)"))

p.lmm2 <- ggplot(lmm.comps2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.lmm, breaks = seq(0, 1, alpha.lmm)) +
  labs(y = "Count", x = "p-value", title = " ",
       subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(lmm.comps2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.lmm2, 3), 
                          "\nSample size = ", sample_size.lmm, 
                          " participants\n(6 paired responses per participant)"))

p.lmm <- ggarrange(p.lmm1, p.lmm2,
                   common.legend = TRUE,
                   legend = "bottom")
p.lmm

Cumulative Link Mixed Models (clmm)

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.clmm = 1000 # Number of simulations to run
sample_size.clmm = 20 # Sample size for each simulation (number of participants)
alpha.clmm = 0.05 # Significance level

Create simulation-based dataset of participants with defined sample size

samples.clmm <- map_dfr(seq_len(num_sims.clmm), ~part %>%
                         sample_n(sample_size.clmm) %>%
                         mutate(sample.clmm = as.factor(.x)))

samples.clmm2 <- map_dfr(seq_len(num_sims.clmm), ~part2 %>%
                         sample_n(sample_size.clmm) %>%
                         mutate(sample.clmm = as.factor(.x)))

Full simulation-based dataset with all data per participant, in long format

samples.clmm.long <- left_join(samples.clmm, dat, by = "Participant",
                               relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value") %>%
  mutate(Value = as.factor(Value)) %>%
  mutate(Condition = as.factor(Condition))

samples.clmm.long2 <- left_join(samples.clmm2, dat2, by = "Participant",
                               relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value") %>%
  mutate(Value = as.factor(Value)) %>%
  mutate(Condition = as.factor(Condition))

Fit cumulative link mixed model on each simulated sample and write p-value to new dataset (clmm.comps)

clmm.comps <- data.frame(Sample = 1:num_sims.clmm) #create empty dataframe

for (i in 1:num_sims.clmm){
  samp <- samples.clmm.long %>%
    filter(sample.clmm == i)
  tryCatch(mod <- clmm(Value ~ Condition + (1 | Participant), data = samp),
           error = function(e) {
             message('Error')
             print(e)
           })
  tryCatch(comp <- emmeans(mod, pairwise~Condition),
           error = function(e) e, NULL)
  clmm.comps$p[i] <- round(data.frame(comp$contrast)$p.value, 3)
  clmm.comps$`Statistical signicance`[i] <- ifelse(clmm.comps$p[i] <= alpha.clmm, "Significant", "Non-significant")
}

clmm.comps2 <- data.frame(Sample = 1:num_sims.clmm) #create empty dataframe

for (i in 1:num_sims.clmm){
  samp2 <- samples.clmm.long2 %>%
    filter(sample.clmm == i)
  tryCatch(mod2 <- clmm(Value ~ Condition + (1 | Participant), data = samp2),
           error = function(e) {
             message('Error')
             print(e)
           })
  tryCatch(comp2 <- emmeans(mod2, pairwise~Condition),
           error = function(e) e, NULL)
  clmm.comps2$p[i] <- round(data.frame(comp2$contrast)$p.value, 3)
  clmm.comps2$`Statistical signicance`[i] <- ifelse(clmm.comps2$p[i] <= alpha.clmm, "Significant", "Non-significant")
}

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.clmm <- sum(clmm.comps$`Statistical signicance` == "Significant") /
  length(unique(samples.clmm.long$sample.clmm))

power.clmm2 <- sum(clmm.comps2$`Statistical signicance` == "Significant") /
  length(unique(samples.clmm.long2$sample.clmm))

Plot histogram of p-values with label for power and sample size

p.clmm1 <- ggplot(clmm.comps, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.clmm, breaks = seq(0, 1, alpha.clmm)) +
  labs(y = "Count", x = "p-value", title = "Cumulative Link Mixed Models. Call: clmm(Value ~ Condition + (1 | Participant))",
       subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(clmm.comps$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.clmm, 3), 
                          "\nSample size = ", sample_size.clmm, 
                          " participants\n(6 paired responses per participant)"))

p.clmm2 <- ggplot(clmm.comps2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.clmm, breaks = seq(0, 1, alpha.clmm)) +
  labs(y = "Count", x = "p-value", title = " ",
       subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(clmm.comps2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.clmm2, 3), 
                          "\nSample size = ", sample_size.clmm, 
                          " participants\n(6 paired responses per participant)"))
p.clmm <- ggarrange(p.clmm1, p.clmm2,
                   common.legend = TRUE,
                   legend = "bottom")
p.clmm

Comparison of different models

ggarrange(p.t, p.wilcox, p.lmm, p.clmm,
          nrow = 4,
          common.legend = TRUE)

Simulation-based power analysis for Hadavi et al. 2023, Cross-cultural relationships between music, emotion, and visual imagery: A comparative study of Iran, Canada, and Japan [Stage 1 Registered Report]

Juan David Leongómez

28 June, 2023

Simulate a population

Power analysis from t-test, Wilcoxon signed-rank tests, and Linear Mixed Models (LMM)

t-tests

Wilcoxon signed-rank tests

Linear Mixed Models

Cumulative Link Mixed Models (clmm)

Comparison of different models