Load necessary packages

library(tidyverse) # For data manipulation and visualization
library(effectsize) # For effect size calculation
library(plyr) # For data manipulation
library(ggpubr) # For organising plots
library(lmerTest) # For fitting lmm models
library(ordinal) # For fitting clmm models
library(emmeans) # For testing contrasts in clmm models and obtain p-values
library(kableExtra) # For tables

Set random seed for reproducibility

set.seed(1)

Simulate a population

This is a simulated a population (N = 60,000) in which the effect size of the (paired) difference between two conditions (A and B) is d = 0.4

Create simulated dataset with two columns (A and B) representing two conditions with different binomial probabilities

dat <- tibble(A = rbinom(60000, size = 4, prob = 0.445) + 1,
              B = rbinom(60000, size = 4, prob = 0.545) + 1,
              Participant = rep(paste0("P", 1:10000), each = 6),
              Pairs = rep(1:6, times = 10000))

# Alternative with less "normal" looking distributions for A and B
p_plausible <- c(.25, .07, .02, .03, .25)

dat2 <- clmm_generate_data(n_participants = 10000,
                          n_trials = 6,
                          control_distribution = p_plausible,
                          effect = 1,
                          participant_variation = 1,
                          within_subject = TRUE,
                          control_weight = .5) %>%
  mutate(group = ifelse(group == 0, "A", "B")) %>%
  dplyr::rename(Value = pas) %>%
  dplyr::rename(Condition = group) %>%
  dplyr::rename(Participant = id) %>%
  mutate(Participant = paste0("p", Participant)) %>%
  select(1:3) %>%
  mutate(pair = rep(1:6, times = 20000)) %>%
  pivot_wider(names_from = Condition, values_from = Value)

Transform data into long format

dat.long <- dat %>% 
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")
dat.long2 <- dat2 %>% 
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

Data frame of participant names

part <- tibble(Participant = unique(dat$Participant))
part2 <- tibble(Participant = unique(dat2$Participant))

Calculate the mean and standard deviation of “Value” for each condition

mu <- dat.long %>% 
  group_by(Condition) %>%    # Condition the data by the "Condition" column
  dplyr::summarise(     # Summarize the data by calculating the mean and standard deviation
    Mean = mean(as.numeric(Value)), # Mean of "Value" for each condition
    SD= sd(as.numeric(Value)))       # Standard deviation of "Value" for each condition

mu2 <- dat.long2 %>% 
  group_by(Condition) %>%    # Condition the data by the "Condition" column
  dplyr::summarise(     # Summarize the data by calculating the mean and standard deviation
    Mean = mean(as.numeric(Value)), # Mean of "Value" for each condition
    SD= sd(as.numeric(Value))) 

# Print table
rbind(mu, mu2) %>%
  mutate(Distribution = c("Modal", "Modal", "Bimodal", "Bimodal")) %>%
  select(4, 1:3) %>%
  kable(digits = 2) %>%
  collapse_rows() %>%
  kable_paper("hover", full_width = F)
Distribution Condition Mean SD
Modal A 2.78 1.00
Modal B 3.19 1.00
Bimodal A 3.60 1.78
Bimodal B 4.25 1.45

Compute effect size (Cohen’s d) between the two conditions

# Modal
general.d <- cohens_d(x = dat$A, y = dat$B,
                     method = "paired")
general.d <- interpret_cohens_d(general.d)

general.rb <- rank_biserial(Value ~ Condition,
                    data = dat.long, 
                    paired = TRUE)
general.rb <- interpret_rank_biserial(general.rb)

general.w <- kendalls_w(Value ~ Condition | Participant,
                        data = dat.long[1:6000,],
                        verbose = FALSE)
general.w <- interpret_kendalls_w(general.w)

general.rws <- rank_eta_squared(Value ~ Condition,
                                data = dat.long)
general.rws <- interpret_eta_squared(general.rws)

# Bimodal
general.d2 <- cohens_d(x = dat2$A, y = dat2$B,
                     method = "paired")
general.d2 <- interpret_cohens_d(general.d2)

general.rb2 <- rank_biserial(Value ~ Condition,
                    data = dat.long2, 
                    paired = TRUE)
general.rb2 <- interpret_rank_biserial(general.rb2)

general.w2 <- kendalls_w(Value ~ Condition | Participant,
                         data = dat.long2[1:6000,],
                         verbose = FALSE)
general.w2 <- interpret_kendalls_w(general.w2)

general.rws2 <- rank_eta_squared(Value ~ Condition,
                                data = dat.long2)
general.rws2 <- interpret_eta_squared(general.rws2)

Plot histogram of data with vertical lines for mean of each condition and label for effect size

p1a <- ggplot(dat.long, aes(x = Value, fill = Condition, colour = Condition)) +
  geom_histogram(alpha = 0.3, position = "identity", binwidth = 1) +
  geom_vline(data = mu, aes(xintercept = Mean, color = Condition),
             linetype = "dashed") +
  labs(x = "Score", y = "Count", title = "Modal") +
  annotate("text", x = 2, y = 30000, 
           label = paste0("Cohen's d = ", 
                          round(general.d$Cohens_d, 2),
                          ", 95%CI [",
                          round(general.d$CI_low, 2),
                          " — ",
                          round(general.d$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 28000, 
           label = paste0("r (Rank biserial) = ", 
                          round(general.rb$r_rank_biserial, 2),
                          ", 95%CI [",
                          round(general.rb$CI_low, 2),
                          " — ",
                          round(general.rb$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 26000, 
           label = paste0("Kendall's W = ", 
                          round(general.w$Kendalls_W, 2),
                          ", 95%CI [",
                          round(general.w$CI_low, 2),
                          " — ",
                          round(general.w$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 24000, 
           label = paste0("Rank eta squared = ", 
                          round(general.rws$rank_eta_squared, 2),
                          ", 95%CI [",
                          round(general.rws$CI_low, 2),
                          " — ",
                          round(general.rws$CI_high, 2),
                          "]"))

p1b <- ggplot(dat.long2, aes(x = Value, fill = Condition, colour = Condition)) +
  geom_histogram(alpha = 0.3, position = "identity", binwidth = 1) +
  geom_vline(data = mu2, aes(xintercept = Mean, color = Condition),
             linetype = "dashed") +
  labs(x = "Score", y = "Count", title = "Bimodal") +
  annotate("text", x = 2, y = 50000, 
           label = paste0("Cohen's d = ", 
                          round(general.d2$Cohens_d, 2),
                          ", 95%CI [",
                          round(general.d2$CI_low, 2),
                          " — ",
                          round(general.d2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 47000, 
           label = paste0("r (Rank biserial) = ", 
                          round(general.rb2$r_rank_biserial, 2),
                          ", 95%CI [",
                          round(general.rb2$CI_low, 2),
                          " — ",
                          round(general.rb2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 44000, 
           label = paste0("Kendall's W = ", 
                          round(general.w2$Kendalls_W, 2),
                          ", 95%CI [",
                          round(general.w2$CI_low, 2),
                          " — ",
                          round(general.w2$CI_high, 2),
                          "]")) +
  annotate("text", x = 2, y = 41000, 
           label = paste0("Rank eta squared = ", 
                          round(general.rws2$rank_eta_squared, 2),
                          ", 95%CI [",
                          round(general.rws2$CI_low, 2),
                          " — ",
                          round(general.rws2$CI_high, 2),
                          "]"))

ggarrange(p1a, p1b,
          nrow = 2,
          common.legend = TRUE,
          legend = "bottom")
Histogram of the two conditions (A, B). Vertical dashed lines represente the mean of each condition.

Histogram of the two conditions (A, B). Vertical dashed lines represente the mean of each condition.

Power analysis from t-test, Wilcoxon signed-rank tests, and Linear Mixed Models (LMM)

t-tests

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.t = 1000 # Number of simulations to run
sample_size.t = 82 # Sample size for each simulation
alpha.t = 0.05 # Significance level

Create simulation-based dataset with defined sample size

samples.t <- map_dfr(seq_len(num_sims.t), ~dat %>%
                     sample_n(sample_size.t) %>% 
                     mutate(sample.t = as.factor(.x)))

samples.t2 <- map_dfr(seq_len(num_sims.t), ~dat2 %>%
                     sample_n(sample_size.t) %>% 
                     mutate(sample.t = as.factor(.x)))

Define function to compute paired t-test

compfun <- function(x, y) {
  comp = (t.test(x, y,
                 alternative = "two.sided", paired = TRUE))
}

Perform t-test on each simulated sample and save p-value and effect size (t) to new dataset (comps)

comps.t <- ddply(samples.t, .(sample.t), summarise,
                 t = round(compfun(A, B)$statistic, 2),
                 p = round(compfun(A, B)$p.value, 3),
                 "Statistical signicance" = ifelse(p <= alpha.t, "Significant", "Non-significant"))

comps.t2 <- ddply(samples.t2, .(sample.t), summarise,
                 t = round(compfun(A, B)$statistic, 2),
                 p = round(compfun(A, B)$p.value, 3),
                 "Statistical signicance" = ifelse(p <= alpha.t, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.t <- sum(comps.t$`Statistical signicance` == "Significant") / num_sims.t

power.t2 <- sum(comps.t2$`Statistical signicance` == "Significant") / num_sims.t

Plot histogram of p-values with label for power and sample size

p.t1 <- ggplot(comps.t, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.t, breaks = seq(0, 1, alpha.t)) +
  labs(y = "Count", x = "p-value", title = "Paired t-tests", subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(comps.t$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.t, 3), "\nSample size = ", sample_size.t))

p.t2 <- ggplot(comps.t2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.t, breaks = seq(0, 1, alpha.t)) +
  labs(y = "Count", x = "p-value", title = " ",  subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(comps.t2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.t2, 3), "\nSample size = ", sample_size.t))

p.t <- ggarrange(p.t1, p.t2,
                 common.legend = TRUE,
                 legend = "bottom")
p.t

Wilcoxon signed-rank tests

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.wilcox = 1000 # Number of simulations to run
sample_size.wilcox = 83 # Sample size for each simulation
alpha.wilcox = 0.05 # Significance level

Create simulation-based dataset with defined sample size

samples.wilcox <- map_dfr(seq_len(num_sims.wilcox), ~dat %>%
                            sample_n(sample_size.wilcox) %>%
                            mutate(sample.wilcox = as.factor(.x)))

samples.wilcox2 <- map_dfr(seq_len(num_sims.wilcox), ~dat2 %>%
                            sample_n(sample_size.wilcox) %>%
                            mutate(sample.wilcox = as.factor(.x)))

Define function to compute Wilcoxon signed-rank tests

wilcoxfun <- function(x, y) {
  comp = (wilcox.test(x, y,
                      alternative = "two.sided", paired = TRUE))
}

Perform Wilcoxon signed-rank tests on each simulated dataset and save p-value and effect size (V) to new dataset (comps)

wilcox.comps <- ddply(samples.wilcox, .(sample.wilcox), summarise,
                      V = round(wilcoxfun(A, B)$statistic, 2),
                      p = round(wilcoxfun(A, B)$p.value, 3),
                      "Statistical signicance" = ifelse(p <= alpha.wilcox, "Significant", "Non-significant"))

wilcox.comps2 <- ddply(samples.wilcox2, .(sample.wilcox), summarise,
                      V = round(wilcoxfun(A, B)$statistic, 2),
                      p = round(wilcoxfun(A, B)$p.value, 3),
                      "Statistical signicance" = ifelse(p <= alpha.wilcox, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.wilcox <- sum(wilcox.comps$`Statistical signicance` == "Significant") / num_sims.wilcox

power.wilcox2 <- sum(wilcox.comps2$`Statistical signicance` == "Significant") / num_sims.wilcox

Plot histogram of p-values with label for power and sample size

p.wilcox1 <- ggplot(wilcox.comps, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.wilcox, breaks = seq(0, 1, alpha.wilcox)) +
  labs(y = "Count", x = "p-value", title = "Wilcoxon signed-rank tests", subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(wilcox.comps$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.wilcox, 3), "\nSample size = ", sample_size.wilcox))

p.wilcox2 <- ggplot(wilcox.comps2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.wilcox, breaks = seq(0, 1, alpha.wilcox)) +
  labs(y = "Count", x = "p-value", title = " ", subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(wilcox.comps2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.wilcox2, 3), "\nSample size = ", sample_size.wilcox))

p.wilcox <- ggarrange(p.wilcox1, p.wilcox2,
                      common.legend = TRUE,
                      legend = "bottom")
p.wilcox

Linear Mixed Models

Set parameters for simulation

#Change these numbers to see their effects on statistical power
num_sims.lmm = 1000 # Number of simulations to run
sample_size.lmm = 14 # Sample size for each simulation (number of participants)
alpha.lmm = 0.05 # Significance level

Create simulation-based dataset of participants with defined sample size

samples.lmm <- map_dfr(seq_len(num_sims.lmm), ~part %>%
                         sample_n(sample_size.lmm) %>%
                         mutate(sample.lmm = as.factor(.x)))

samples.lmm2 <- map_dfr(seq_len(num_sims.lmm), ~part2 %>%
                         sample_n(sample_size.lmm) %>%
                         mutate(sample.lmm = as.factor(.x)))

Full simulation-based dataset with all data per particioant, in long format

samples.lmm.long <- left_join(samples.lmm, dat, by = "Participant",
                              relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

samples.lmm.long2 <- left_join(samples.lmm2, dat2, by = "Participant",
                              relationship = "many-to-many") %>%
  pivot_longer(cols = A:B, names_to = "Condition", values_to = "Value")

Define function to compute lmm

lmmfun <- function(y, x, z) {
  mod = (anova(lmer(y ~ x + (1 | z))))
}

Fit linear mixed model on each simulated sample and write p-value to new dataset (lmm.comps)

lmm.comps <- ddply(samples.lmm.long, .(sample.lmm), summarise,
                   p = round(lmmfun(Value, Condition, Participant)$`Pr(>F)`, 3),
                   "Statistical signicance" = ifelse(p <= alpha.lmm, "Significant", "Non-significant"))

lmm.comps2 <- ddply(samples.lmm.long2, .(sample.lmm), summarise,
                   p = round(lmmfun(Value, Condition, Participant)$`Pr(>F)`, 3),
                   "Statistical signicance" = ifelse(p <= alpha.lmm, "Significant", "Non-significant"))

Calculate power of analysis (i.e., proportion of simulations where p-value is less than alpha)

power.lmm <- sum(lmm.comps$`Statistical signicance` == "Significant") / num_sims.lmm

power.lmm2 <- sum(lmm.comps2$`Statistical signicance` == "Significant") / num_sims.lmm

Plot histogram of p-values with label for power and sample size

p.lmm1 <- ggplot(lmm.comps, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.lmm, breaks = seq(0, 1, alpha.lmm)) +
  labs(y = "Count", x = "p-value", title = "Linear Mixed Models. Call: lmer(Value ~ Condition + (1 | Participant))",
       subtitle = "Modal") +
  annotate("text", x = 0.5, y = sum(lmm.comps$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.lmm, 3), 
                          "\nSample size = ", sample_size.lmm, 
                          " participants\n(6 paired responses per participant)"))

p.lmm2 <- ggplot(lmm.comps2, aes(x = p, fill = `Statistical signicance`)) +
  scale_fill_hue(direction = -1) +
  geom_histogram(bins = 1/alpha.lmm, breaks = seq(0, 1, alpha.lmm)) +
  labs(y = "Count", x = "p-value", title = " ",
       subtitle = "Bimodal") +
  annotate("text", x = 0.5, y = sum(lmm.comps2$`Statistical signicance` == "Significant") * 0.9, 
           label = paste0("Power = ", round(power.lmm2, 3), 
                          "\nSample size = ", sample_size.lmm, 
                          " participants\n(6 paired responses per participant)"))

p.lmm <- ggarrange(p.lmm1, p.lmm2,
                   common.legend = TRUE,
                   legend = "bottom")
p.lmm

Comparison of different models

ggarrange(p.t, p.wilcox, p.lmm, p.clmm,
          nrow = 4,
          common.legend = TRUE)