Multi-Arm Sample Size Estimation

# Dependencies
library(MAMS) # MAMS Version 2.0.2
library(multiarm) # Multiarm 0.13.0, devtools::install_github("mjg211/multiarm")
library(tidyverse)
library(ggplot2)
library(gt)
library(ggpubr)

# Table settings
gt_theme <- function(data){
  data |>
    tab_options(table.width = pct(100),
                heading.align = "left",
                table.align = "left",
                table.font.size = px(16),
                column_labels.font.size = px(16),
                column_labels.font.weight = "bold",
                heading.title.font.size = px(16),
                heading.title.font.weight = "bold",
                table.border.top.style = "none",
                data_row.padding = px(2),
                column_labels.padding = px(2),
                heading.padding = px(5)
    ) |>
    cols_align(align = "left", columns = everything())
}

MAMS vs. Multiarm Packages

MAMS is primarily used for group-sequential, multiarm, multistage designs, but can be used to estimate fixed sample, single stage designs (current grant). As demonstrated below, when specified correctly, the two packages perform comparably for estimating the sample size for an outcome with a normal distribution. I prefer multiarm as more transparent and flexible for specifying multiple testing correction and power types. Additionally, the package offers functions for calculating Bernoulli and Poisson distributions (not described here). See: https://mjg211.github.io/multiarm/.

Multiple Testing Correction:

Bonferroni multiple testing correction, although more conservative, is typically preferred when outcomes tend to be highly correlated (as will certainly be the case among related psychometric scales).

Power Type:

The disjunctive power (or minimal power) is the probability of finding at least one true intervention effect across all of the outcomes.
The conjunctive power (or maximal power) is the probability of finding a true intervention effect on all outcomes.
The marginal (or individual) power is the probability of finding a true intervention effect on a particular outcome and is calculated separately for each outcome.

“When the clinical objective is to detect an intervention effect for at least one of the outcomes the disjunctive power and marginal power are recommended whereas the conjunctive power is recommended when the clinical objective is to detect an intervention effect on all the outcomes.” See: https://doi.org/10.1186/s12874-019-0754-4

Method	MTC	N	n_site	N_cor¹	n_site_cor¹	Alpha	Power	Delta1	Delta0	SD	PowerType
Table 1: Comparison of MAMS vs. multiarm Packages for Sample Size Estimation
MAMS	Dunnett	276	34	386	48	0.05	0.8	0.5	0.1	1	Marginal
Multiarm	Dunnett	272	34	381	48	0.05	0.8	0.5	0.1	1	Marginal
Multiarm	Bonferroni	284	36	398	50	0.05	0.8	0.5	0.1	1	Marginal
¹ Corrected assuming 40% drop-out rate.

The more conservative Bonferroni correction only modestly increases the necessary sample size. Current study is recruiting for each arm from 2 different sites, as such the n_site calculated by the package(s) should be divided by 2 to derive the actual sample size per site (obviously, the total study sample size N remains the same).

Multiarm Power x N x Delta1 Estimates

Dunnett MTC

ma_est_dunnett <- function() {
  delta1 <- seq(0.2, 0.8, by = 0.1)
  beta <- seq(0.05, 0.95, by = 0.05)
  n <- rep(NA, length(beta) * length(delta1))
  N <- rep(NA, length(beta) * length(delta1))
  beta_res <- rep(NA, length(beta) * length(delta1))
  delta1_res <- rep(NA, length(beta) * length(delta1))

  i <- 1
  for (d in delta1) {
    for (b in beta) {
      d_multi <- des_ss_norm(
        K = 3, # no. experimental treatments
        alpha = 0.05, # significance level
        beta = b, # beta, power = 1-beta
        delta1 = d, # interesting treatment effect on the traditional scale
        delta0 = 0.1, # uninteresting treatment effect on the traditional scale
        sigma = rep(1, 4), # standard deviations of the responses in each arm
        ratio = rep(1, 3), # allocation ratio
        correction = "dunnett", # multiple testing correction
        power  = "marginal", # type of power to control
        integer = TRUE
      ) # return integer sample size
      n[i] <- d_multi$n
      N[i] <- d_multi$N
      beta_res[i] <- b
      delta1_res[i] <- d
      i <- i + 1
    }
  }
  df <- data.frame(
    Delta1 = delta1_res,
    Beta = beta_res,
    Power_pct = (1 - beta_res) * 100,
    n_site = ceiling(n / 2),
    N = ceiling(N),
    n_site_cor = ceiling((n * 1.5) / 2),
    N_cor = ceiling(N * 1.5),
    PowerType = "Marginal",
    Correction = "Dunnett"
  )
  return(df)
}

t_ma_est_dunnett <- ma_est_dunnett()

t_ma_est_dunnett |> 
  filter(Power_pct %in% c(70, 80, 90)) |>
  filter(Delta1 %in% c(0.2, 0.5, 0.8)) |>
  gt() |>
  gt_theme() |>
  tab_header(title = md("**Table 2: Multiarm Power x N x Delta1; MTC: Dunnett**")) |>   
  tab_footnote(footnote = "Corrected assuming 40% drop-out rate.",
               locations = cells_column_labels(columns = c(n_site_cor, N_cor)))

Delta1	Beta	Power_pct	n_site	N	n_site_cor¹	N_cor¹	PowerType	Correction
Table 2: Multiarm Power x N x Delta1; MTC: Dunnett
0.2	0.1	90	280	2236	420	3354	Marginal	Dunnett
0.2	0.2	80	211	1688	317	2532	Marginal	Dunnett
0.2	0.3	70	168	1340	252	2010	Marginal	Dunnett
0.5	0.1	90	45	360	68	540	Marginal	Dunnett
0.5	0.2	80	34	272	51	408	Marginal	Dunnett
0.5	0.3	70	27	216	41	324	Marginal	Dunnett
0.8	0.1	90	18	140	27	210	Marginal	Dunnett
0.8	0.2	80	14	108	21	162	Marginal	Dunnett
0.8	0.3	70	11	84	16	126	Marginal	Dunnett
¹ Corrected assuming 40% drop-out rate.

ggplot(t_ma_est_dunnett) +
  aes(x = Power_pct, y = N, colour = as.factor(Delta1), group = as.factor(Delta1)) +
  geom_line() +
  scale_color_hue(direction = 1) +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Power%",
    y = "log10(Estimated N_cor)",
    title = "Figure 1: Multi-Arm, Power x N_cor x Delta1",
    subtitle = "MTC: Dunnett",
    color = "Delta1") +
  theme_classic2() +
  theme(plot.title = element_text(face = "bold", size = 14L),
    plot.subtitle = element_text(size = 14L))

Bonferroni MTC

ma_est_bon <- function() {
  delta1 <- seq(0.2, 0.8, by = 0.1)
  beta <- seq(0.05, 0.95, by = 0.05)
  n <- rep(NA, length(beta) * length(delta1))
  N <- rep(NA, length(beta) * length(delta1))
  beta_res <- rep(NA, length(beta) * length(delta1))
  delta1_res <- rep(NA, length(beta) * length(delta1))

  i <- 1
  for (d in delta1) {
    for (b in beta) {
      d_multi <- des_ss_norm(
        K = 3, # no. experimental treatments
        alpha = 0.05, # significance level
        beta = b, # beta, power = 1-beta
        delta1 = d, # interesting treatment effect on the traditional scale
        delta0 = 0.1, # uninteresting treatment effect on the traditional scale
        sigma = rep(1, 4), # standard deviations of the responses in each arm
        ratio = rep(1, 3), # allocation ratio
        correction = "bonferroni", # multiple testing correction
        power  = "marginal", # type of power to control
        integer = TRUE
      ) # return integer sample size
      n[i] <- d_multi$n
      N[i] <- d_multi$N
      beta_res[i] <- b
      delta1_res[i] <- d
      i <- i + 1
    }
  }
  df <- data.frame(
    Delta1 = delta1_res,
    Beta = beta_res,
    Power_pct = (1 - beta_res) * 100,
    n_site = ceiling(n / 2),
    N = ceiling(N),
    n_site_cor = ceiling((n * 1.5) / 2),
    N_cor = ceiling(N * 1.5),
    PowerType = "Marginal",
    Correction = "Bonferroni"
  )
  return(df)
}

t_ma_est_bon <- ma_est_bon()

t_ma_est_bon |> 
  filter(Power_pct %in% c(70, 80, 90)) |>
  filter(Delta1 %in% c(0.2, 0.5, 0.8)) |>
  gt() |>
  gt_theme() |>
  tab_header(title = md("**Table 3: Multiarm Power x N x Delta1; MTC: Bonferroni**")) |>
  tab_footnote(footnote = "Corrected assuming 40% drop-out rate.",
               locations = cells_column_labels(columns = c(n_site_cor, N_cor)))

Delta1	Beta	Power_pct	n_site	N	n_site_cor¹	N_cor¹	PowerType	Correction
Table 3: Multiarm Power x N x Delta1; MTC: Bonferroni
0.2	0.1	90	291	2328	437	3492	Marginal	Bonferroni
0.2	0.2	80	221	1764	331	2646	Marginal	Bonferroni
0.2	0.3	70	176	1408	264	2112	Marginal	Bonferroni
0.5	0.1	90	47	376	71	564	Marginal	Bonferroni
0.5	0.2	80	36	284	54	426	Marginal	Bonferroni
0.5	0.3	70	29	228	43	342	Marginal	Bonferroni
0.8	0.1	90	19	148	28	222	Marginal	Bonferroni
0.8	0.2	80	14	112	21	168	Marginal	Bonferroni
0.8	0.3	70	11	88	17	132	Marginal	Bonferroni
¹ Corrected assuming 40% drop-out rate.

ggplot(t_ma_est_bon) +
  aes(x = Power_pct, y = N_cor, colour = as.factor(Delta1), group = as.factor(Delta1)) +
  geom_line() +
  scale_color_hue(direction = 1) +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Power%",
    y = "log10(Estimated N_cor)",
    title = "Figure 2 : Multi-Arm, Power x N_cor x Delta1",
    subtitle = "MTC: Bonferroni",
    color = "Delta1"
  ) +
  theme_classic2() + 
    theme(plot.title = element_text(face = "bold", size = 14L),
    plot.subtitle = element_text(size = 14L))

Example Estimation & Methods Description

Supposing the range of treatment effects anticipated is 0.3, 0.4, and 0.5. Calculating the estimated sample size range needed overall and by site for 70% power.

Example Estimation: Marginal Power Type

ma_est_bon_ex <- function() {
  delta1 <- seq(0.3, 0.5, by = 0.1)
  beta <- 0.3
  n <- rep(NA, length(beta) * length(delta1))
  N <- rep(NA, length(beta) * length(delta1))
  beta_res <- rep(NA, length(beta) * length(delta1))
  delta1_res <- rep(NA, length(beta) * length(delta1))
  mtcpv <- rep(NA, length(beta) * length(delta1))

  i <- 1
  for (d in delta1) {
    for (b in beta) {
      d_multi <- des_ss_norm(
        K = 3, # no. experimental treatments
        alpha = 0.05, # significance level
        beta = b, # beta, power = 1-beta
        delta1 = d, # interesting treatment effect on the traditional scale
        delta0 = 0.1, # uninteresting treatment effect on the traditional scale
        sigma = rep(1, 4), # standard deviations of the responses in each arm
        ratio = rep(1, 3), # allocation ratio
        correction = "bonferroni", # multiple testing correction
        power  = "marginal", # type of power to control
        integer = TRUE
      ) # return integer sample size
      n[i] <- d_multi$n
      N[i] <- d_multi$N
      beta_res[i] <- b
      delta1_res[i] <- d
      mtcpv[i] <- d_multi$opchar$P1[1]
      i <- i + 1
    }
  }
  df <- data.frame(
    Delta1 = delta1_res,
    Beta = beta_res,
    Power_pct = (1 - beta_res) * 100,
    n_site = ceiling(n / 2),
    N = ceiling(N),
    n_site_cor = ceiling((n * 1.5) / 2),
    N_cor = ceiling(N * 1.5),
    PowerType = "Marginal",
    Correction = "Bonferroni",
    MTC_pv = round(mtcpv, digits = 3)
  )
  return(df)
}

t_ma_est_bon_ex <- ma_est_bon_ex()


t_ma_est_bon_ex |>
  gt() |>
  gt_theme() |>
  tab_header(title = md("**Table 4: Example Estimation - Marginal Power Type**")) |>
  tab_footnote(footnote = "Corrected assuming 40% drop-out rate.",
               locations = cells_column_labels(columns = c(n_site_cor, N_cor))) |>
  tab_footnote(footnote = "Bonferroni critical p-value for significance.",
               locations = cells_column_labels(columns = c(MTC_pv)))

Delta1	Beta	Power_pct	n_site	N	n_site_cor¹	N_cor¹	PowerType	Correction	MTC_pv²
Table 4: Example Estimation - Marginal Power Type
0.3	0.3	70	79	628	118	942	Marginal	Bonferroni	0.017
0.4	0.3	70	44	352	66	528	Marginal	Bonferroni	0.017
0.5	0.3	70	29	228	43	342	Marginal	Bonferroni	0.017
¹ Corrected assuming 40% drop-out rate.
² Bonferroni critical p-value for significance.

K=3 experimental treatments will be included in the trial. A significance level of alpha=0.05 will be used, in combination with Bonferroni’s correction. The standard deviations of the responses will be assumed to be 1 in each arm. The minimum marginal power will be controlled to level 1−beta=0.7 under each of their respective least favorable configurations. The target allocation to each of the experimental arms will be the same as the control arm with the realized allocation ratios to the experimental arms at 1:1:1. Accounting for a 40% drop-out rate with an anticipating treatment effect range of 0.3-05 detectable with 80% power, the estimated sample size needed is 342-942 overall and 43-118 participants needed per site. A Bonferroni p-value threshold 0.017 will be used to determine significance among arm comparisons.

Example Estimation: Disjunctive Power Type

ma_est_bon_ex_dis <- function() {
  delta1 <- seq(0.3, 0.5, by = 0.1)
  beta <- 0.3
  n <- rep(NA, length(beta) * length(delta1))
  N <- rep(NA, length(beta) * length(delta1))
  beta_res <- rep(NA, length(beta) * length(delta1))
  delta1_res <- rep(NA, length(beta) * length(delta1))
  mtcpv <- rep(NA, length(beta) * length(delta1))

  i <- 1
  for (d in delta1) {
    for (b in beta) {
      d_multi <- des_ss_norm(
        K = 3, # no. experimental treatments
        alpha = 0.05, # significance level
        beta = b, # beta, power = 1-beta
        delta1 = d, # interesting treatment effect on the traditional scale
        delta0 = 0.1, # uninteresting treatment effect on the traditional scale
        sigma = rep(1, 4), # standard deviations of the responses in each arm
        ratio = rep(1, 3), # allocation ratio
        correction = "bonferroni", # multiple testing correction
        power  = "disjunctive", # type of power to control
        integer = TRUE
      ) # return integer sample size
      n[i] <- d_multi$n
      N[i] <- d_multi$N
      beta_res[i] <- b
      delta1_res[i] <- d
      mtcpv[i] <- d_multi$opchar$P1[1]
      i <- i + 1
    }
  }
  df <- data.frame(
    Delta1 = delta1_res,
    Beta = beta_res,
    Power_pct = (1 - beta_res) * 100,
    n_site = ceiling(n / 2),
    N = ceiling(N),
    n_site_cor = ceiling((n * 1.5) / 2),
    N_cor = ceiling(N * 1.5),
    PowerType = "Disjunctive",
    Correction = "Bonferroni",
    MTC_pv = round(mtcpv, digits = 3)
  )
  return(df)
}

t_ma_est_bon_ex_dis <- ma_est_bon_ex_dis()


t_ma_est_bon_ex_dis |>
  gt() |>
  gt_theme() |>
  tab_header(title = md("**Table 5: Example Estimation - Disjunctive Power Type**")) |>
  tab_footnote(footnote = "Corrected assuming 40% drop-out rate.",
               locations = cells_column_labels(columns = c(n_site_cor, N_cor))) |>
  tab_footnote(footnote = "Bonferroni critical p-value for significance.",
               locations = cells_column_labels(columns = c(MTC_pv)))

Delta1	Beta	Power_pct	n_site	N	n_site_cor¹	N_cor¹	PowerType	Correction	MTC_pv²
Table 5: Example Estimation - Disjunctive Power Type
0.3	0.3	70	45	356	67	534	Disjunctive	Bonferroni	0.017
0.4	0.3	70	25	200	38	300	Disjunctive	Bonferroni	0.017
0.5	0.3	70	16	128	24	192	Disjunctive	Bonferroni	0.017
¹ Corrected assuming 40% drop-out rate.
² Bonferroni critical p-value for significance.

Example Estimation: Conjunctive Power Type

ma_est_bon_ex_con <- function() {
  delta1 <- seq(0.3, 0.5, by = 0.1)
  beta <- 0.3
  n <- rep(NA, length(beta) * length(delta1))
  N <- rep(NA, length(beta) * length(delta1))
  beta_res <- rep(NA, length(beta) * length(delta1))
  delta1_res <- rep(NA, length(beta) * length(delta1))
  mtcpv <- rep(NA, length(beta) * length(delta1))

  i <- 1
  for (d in delta1) {
    for (b in beta) {
      d_multi <- des_ss_norm(
        K = 3, # no. experimental treatments
        alpha = 0.05, # significance level
        beta = b, # beta, power = 1-beta
        delta1 = d, # interesting treatment effect on the traditional scale
        delta0 = 0.1, # uninteresting treatment effect on the traditional scale
        sigma = rep(1, 4), # standard deviations of the responses in each arm
        ratio = rep(1, 3), # allocation ratio
        correction = "bonferroni", # multiple testing correction
        power  = "conjunctive", # type of power to control
        integer = TRUE
      ) # return integer sample size
      n[i] <- d_multi$n
      N[i] <- d_multi$N
      beta_res[i] <- b
      delta1_res[i] <- d
      mtcpv[i] <- d_multi$opchar$P1[1]
      i <- i + 1
    }
  }
  df <- data.frame(
    Delta1 = delta1_res,
    Beta = beta_res,
    Power_pct = (1 - beta_res) * 100,
    n_site = ceiling(n / 2),
    N = ceiling(N),
    n_site_cor = ceiling((n * 1.5) / 2),
    N_cor = ceiling(N * 1.5),
    PowerType = "Conjunctive",
    Correction = "Bonferroni",
    MTC_pv = round(mtcpv, digits = 3)
  )
  return(df)
}

t_ma_est_bon_ex_con <- ma_est_bon_ex_con()


t_ma_est_bon_ex_con |>
  gt() |>
  gt_theme() |>
  tab_header(title = md("**Table 6: Example Estimation - Conjunctive Power Type**"),
             subtitle = "Delta1=0.3-0.5; Delta0=0.1; Power=80%, MTC=Bonferroni") |>
  tab_footnote(footnote = "Corrected assuming 40% drop-out rate.",
               locations = cells_column_labels(columns = c(n_site_cor, N_cor))) |>
  tab_footnote(footnote = "Bonferroni critical p-value for significance.",
               locations = cells_column_labels(columns = c(MTC_pv)))

Delta1	Beta	Power_pct	n_site	N	n_site_cor¹	N_cor¹	PowerType	Correction	MTC_pv²
Table 6: Example Estimation - Conjunctive Power Type
Delta1=0.3-0.5; Delta0=0.1; Power=80%, MTC=Bonferroni
0.3	0.3	70	113	904	170	1356	Conjunctive	Bonferroni	0.017
0.4	0.3	70	64	508	96	762	Conjunctive	Bonferroni	0.017
0.5	0.3	70	41	328	62	492	Conjunctive	Bonferroni	0.017
¹ Corrected assuming 40% drop-out rate.
² Bonferroni critical p-value for significance.