library(pacman); p_load(pwr, tidyr, dplyr, ggplot2, faux, boot)

CRITR <- function(n, alpha = .05) {
  df <- n - 2; CRITT <- qt(alpha/2, df, lower.tail = F)
  CRITR <- sqrt((CRITT^2)/((CRITT^2) + df ))
  return(CRITR)}

MeanSigR <- function(N, P = 0.05, R, Reps, seed = 1){
  library(pacman); p_load(faux, boot)
  set.seed(seed)
  RepList <- replicate(n = Reps, rnorm_multi(
    n = N, r = R, varnames = c("X", "Y")), simplify = F)
  CorList <- (lapply(RepList, corr)); CorListNum <- as.numeric(CorList)
  CorListCut <- CorListNum[CorListNum > CRITR(N, P)]
  Res <- mean(CorListCut); return(Res)}

MeanSigT <- function(N, P = 0.05, md, sd = 1, R = 0, Reps, seed = 1, sides = 2){
  library(pacman); p_load(compute.es, faux)
  set.seed(seed)
  RepList <- replicate(n = Reps, rnorm_multi(
    n = N, r = R, mu = c(0, md), sd = c(1, sd), varnames = c("X", "Y")), simplify = F)
  MeanList <- lapply(RepList, t.test)
  MeanListSmall <- lapply(MeanList, "[", c("statistic", "estimate")); MeanListNum <- as.numeric(unlist(MeanListSmall))
  MeanListD <- tes(MeanListNum, n.1 = N, n.2 = N, verbose = F)$d
  MeanListP <- tes(MeanListNum, n.1 = N, n.2 = N, verbose = F)$pval.d
  MeanListCut <- MeanListD[MeanListP < P]
  Res <- mean(MeanListCut); return(Res)}

This replicates the plots from Yarkoni (2009) because I needed these plots for a discussion of Troller-Renfree et al. (2022).

powlevels <- seq(0.05, 1, 0.01)

Power13 <- pwr.r.test(r = powlevels,
                      n = 13, 
                      sig.level = 0.05)

Power17 <- pwr.r.test(r = powlevels,
                      n = 17, 
                      sig.level = 0.05)

Power63 <- pwr.r.test(r = powlevels,
                      n = 63, 
                      sig.level = 0.05)

Power64 <- pwr.r.test(r = powlevels,
                      n = 64, 
                      sig.level = 0.05)

Power81 <- pwr.r.test(r = powlevels,
                      n = 81, 
                      sig.level = 0.05)

Power90 <- pwr.r.test(r = powlevels,
                      n = 90, 
                      sig.level = 0.05)

Power129 <- pwr.r.test(r = powlevels,
                       n = 129, 
                       sig.level = 0.05)

PowerFrame <- data.frame(R = powlevels, Power = powlevels, ES13 = Power13$power, ES17 = Power17$power, 
                         #ES63 = Power63$power, #Too similar to N = 64
                         ES64 = Power64$power, ES81 = Power81$power, ES90 = Power90$power, ES129 = Power129$power)

PowerFrame <- pivot_longer(PowerFrame,
                           cols = starts_with("ES"),
                           names_to = "test_type",
                           values_to = "values")

PowerFrame$legend_label <- gsub("ES", "n = ", PowerFrame$test_type)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
                                  levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))

ggplot(PowerFrame, aes(x = R, y = values, group = legend_label, color = legend_label)) +
  geom_line(size = 1) + 
  theme_bw() + labs(x = "Correlation", y = "Power", color = "Sample Size") + 
  theme(legend.position = c(0.95, 0.15)) + geom_hline(yintercept = 0.80, linetype = "dashed")

O16 <- MeanSigR(N = 6, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O110 <- MeanSigR(N = 10, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O120 <- MeanSigR(N = 20, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O130 <- MeanSigR(N = 30, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O140 <- MeanSigR(N = 40, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O150 <- MeanSigR(N = 50, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O160 <- MeanSigR(N = 60, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O170 <- MeanSigR(N = 70, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O180 <- MeanSigR(N = 80, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O190 <- MeanSigR(N = 90, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1100 <- MeanSigR(N = 100, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1125 <- MeanSigR(N = 125, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1150 <- MeanSigR(N = 150, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1175 <- MeanSigR(N = 175, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1200 <- MeanSigR(N = 200, P = 0.05, R = 0.1, Reps = 10000, seed = 100)

O36 <- MeanSigR(N = 6, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O310 <- MeanSigR(N = 10, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O320 <- MeanSigR(N = 20, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O330 <- MeanSigR(N = 30, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O340 <- MeanSigR(N = 40, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O350 <- MeanSigR(N = 50, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O360 <- MeanSigR(N = 60, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O370 <- MeanSigR(N = 70, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O380 <- MeanSigR(N = 80, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O390 <- MeanSigR(N = 90, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3100 <- MeanSigR(N = 100, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3125 <- MeanSigR(N = 125, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3150 <- MeanSigR(N = 150, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3175 <- MeanSigR(N = 175, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3200 <- MeanSigR(N = 200, P = 0.05, R = 0.3, Reps = 10000, seed = 100)

O56 <- MeanSigR(N = 6, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O510 <- MeanSigR(N = 10, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O520 <- MeanSigR(N = 20, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O530 <- MeanSigR(N = 30, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O540 <- MeanSigR(N = 40, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O550 <- MeanSigR(N = 50, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O560 <- MeanSigR(N = 60, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O570 <- MeanSigR(N = 70, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O580 <- MeanSigR(N = 80, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O590 <- MeanSigR(N = 90, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5100 <- MeanSigR(N = 100, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5125 <- MeanSigR(N = 125, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5150 <- MeanSigR(N = 150, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5175 <- MeanSigR(N = 175, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5200 <- MeanSigR(N = 200, P = 0.05, R = 0.5, Reps = 10000, seed = 100)

O76 <- MeanSigR(N = 6, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O710 <- MeanSigR(N = 10, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O720 <- MeanSigR(N = 20, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O730 <- MeanSigR(N = 30, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O740 <- MeanSigR(N = 40, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O750 <- MeanSigR(N = 50, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O760 <- MeanSigR(N = 60, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O770 <- MeanSigR(N = 70, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O780 <- MeanSigR(N = 80, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O790 <- MeanSigR(N = 90, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7100 <- MeanSigR(N = 100, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7125 <- MeanSigR(N = 125, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7150 <- MeanSigR(N = 150, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7175 <- MeanSigR(N = 175, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7200 <- MeanSigR(N = 200, P = 0.05, R = 0.7, Reps = 10000, seed = 100)

SS <- c(5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200)

PowerFrame <- data.frame(SS = SS, 
                         O1 = c(O16, O110, O120, O130, O140, O150, O160, O170, O180, O190, O1100, O1125, O1150, O1175, O1200),
                         O3 = c(O36, O310, O320, O330, O340, O350, O360, O370, O380, O390, O3100, O3125, O3150, O3175, O3200),
                         O5 = c(O56, O510, O520, O530, O540, O550, O560, O570, O580, O590, O5100, O5125, O5150, O5175, O5200),
                         O7 = c(O76, O710, O720, O730, O740, O750, O760, O770, O780, O790, O7100, O7125, O7150, O7175, O7200))

PowerFrame <- pivot_longer(PowerFrame,
                           cols = starts_with("O"),
                           names_to = "R_Size",
                           values_to = "values")

PowerFrame$legend_label <- gsub("O", "r = 0.", PowerFrame$R_Size)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
## Warning: NAs introduced by coercion
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
                                  levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))

ggplot(PowerFrame, aes(x = SS, y = values, group = legend_label, color = legend_label)) + 
  geom_line(size = 1) + 
  geom_point() + 
  theme_bw() + labs(x = "Sample Size", y = "Mean Significant Correlation", color = "True Correlation") + 
  geom_hline(yintercept = 0.70, linetype = "dashed" ) + 
  geom_hline(yintercept = 0.50, linetype = "dotdash") + 
  geom_hline(yintercept = 0.30, linetype = "dotted" ) + 
  geom_hline(yintercept = 0.10, linetype = "twodash") +
  theme(legend.position = c(0.945, 0.91), legend.background = element_blank())

It may also be worthwhile to produce a plot of the likelihood of a false positive under \(H_0\) given a certain number of comparisons.

Comps <- seq(1, 100, 1)
FPC <- 1-0.95^Comps
MCDF <- data.frame("Comparisons" = c(Comps), "FalsePositives" = c(FPC))

ggplot(MCDF, aes(x = Comparisons, y = FalsePositives)) + 
  geom_line(size = 1) + theme_bw() + 
  labs(x = "Number of Comparisons", y = "False Positive Probability under H0")

We can also plot mean significant d’s versus sample sizes.

O110 <- MeanSigT(N = 10, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O120 <- MeanSigT(N = 20, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O130 <- MeanSigT(N = 30, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O140 <- MeanSigT(N = 40, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O150 <- MeanSigT(N = 50, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O160 <- MeanSigT(N = 60, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O170 <- MeanSigT(N = 70, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O180 <- MeanSigT(N = 80, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O190 <- MeanSigT(N = 90, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1100 <- MeanSigT(N = 100, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1125 <- MeanSigT(N = 125, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1150 <- MeanSigT(N = 150, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1175 <- MeanSigT(N = 175, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1200 <- MeanSigT(N = 200, P = 0.05, md = 0.1, Reps = 100000, seed = 100)

O310 <- MeanSigT(N = 10, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O320 <- MeanSigT(N = 20, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O330 <- MeanSigT(N = 30, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O340 <- MeanSigT(N = 40, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O350 <- MeanSigT(N = 50, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O360 <- MeanSigT(N = 60, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O370 <- MeanSigT(N = 70, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O380 <- MeanSigT(N = 80, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O390 <- MeanSigT(N = 90, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3100 <- MeanSigT(N = 100, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3125 <- MeanSigT(N = 125, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3150 <- MeanSigT(N = 150, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3175 <- MeanSigT(N = 175, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3200 <- MeanSigT(N = 200, P = 0.05, md = 0.3, Reps = 10000, seed = 100)

O510 <- MeanSigT(N = 10, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O520 <- MeanSigT(N = 20, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O530 <- MeanSigT(N = 30, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O540 <- MeanSigT(N = 40, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O550 <- MeanSigT(N = 50, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O560 <- MeanSigT(N = 60, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O570 <- MeanSigT(N = 70, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O580 <- MeanSigT(N = 80, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O590 <- MeanSigT(N = 90, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5100 <- MeanSigT(N = 100, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5125 <- MeanSigT(N = 125, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5150 <- MeanSigT(N = 150, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5175 <- MeanSigT(N = 175, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5200 <- MeanSigT(N = 200, P = 0.05, md = 0.5, Reps = 10000, seed = 100)

O910 <- MeanSigT(N = 10, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O920 <- MeanSigT(N = 20, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O930 <- MeanSigT(N = 30, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O940 <- MeanSigT(N = 40, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O950 <- MeanSigT(N = 50, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O960 <- MeanSigT(N = 60, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O970 <- MeanSigT(N = 70, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O980 <- MeanSigT(N = 80, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O990 <- MeanSigT(N = 90, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9100 <- MeanSigT(N = 100, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9125 <- MeanSigT(N = 125, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9150 <- MeanSigT(N = 150, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9175 <- MeanSigT(N = 175, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9200 <- MeanSigT(N = 200, P = 0.05, md = 0.9, Reps = 10000, seed = 100)

SS <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200)

PowerFrame <- data.frame(SS = SS, 
                         O1 = c(O110, O120, O130, O140, O150, O160, O170, O180, O190, O1100, O1125, O1150, O1175, O1200),
                         O3 = c(O310, O320, O330, O340, O350, O360, O370, O380, O390, O3100, O3125, O3150, O3175, O3200),
                         O5 = c(O510, O520, O530, O540, O550, O560, O570, O580, O590, O5100, O5125, O5150, O5175, O5200),
                         O9 = c(O910, O920, O930, O940, O950, O960, O970, O980, O990, O9100, O9125, O9150, O9175, O9200))

PowerFrame <- pivot_longer(PowerFrame,
                           cols = starts_with("O"),
                           names_to = "D_Size",
                           values_to = "values")

PowerFrame$legend_label <- gsub("O", "d = 0.", PowerFrame$D_Size)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
## Warning: NAs introduced by coercion
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
                                  levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))

ggplot(PowerFrame, aes(x = SS, y = values, group = legend_label, color = legend_label)) + 
  geom_line(size = 1) + 
  geom_point() + 
  theme_bw() + labs(x = "Sample Size", y = "Mean Significant Mean Difference", color = "True Mean Difference") + 
  geom_hline(yintercept = 0.90, linetype = "dashed" ) + 
  geom_hline(yintercept = 0.50, linetype = "dotdash") + 
  geom_hline(yintercept = 0.30, linetype = "dotted" ) + 
  geom_hline(yintercept = 0.10, linetype = "twodash") +
  theme(legend.position = c(0.925, 0.91), legend.background = element_blank())

Mean significant differences eventually converge to below their true level, but effect size exaggeration is extreme even with quite large samples. The reason the d = 0.1 results had to be repeated 100000 times was because they were unstable at only 10000 repetitions because there were too few significant effects.

References

Yarkoni, Tal. 2009. “Big Correlations in Little Studies: Inflated FMRI Correlations Reflect Low Statistical Power—Commentary on Vul et al. (2009).” Perspectives on Psychological Science 4(3):294–98. doi: 10.1111/j.1745-6924.2009.01127.x.

Troller-Renfree, Sonya V., Molly A. Costanzo, Greg J. Duncan, Katherine Magnuson, Lisa A. Gennetian, Hirokazu Yoshikawa, Sarah Halpern-Meekin, Nathan A. Fox, and Kimberly G. Noble. 2022. “The Impact of a Poverty Reduction Intervention on Infant Brain Activity.” Proceedings of the National Academy of Sciences 119(5). doi: 10.1073/pnas.2115649119.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] compute.es_0.2-5 boot_1.3-28      faux_1.1.0       ggplot2_3.3.5   
## [5] dplyr_1.0.7      tidyr_1.1.4      pwr_1.3-0        pacman_0.5.1    
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.9        pillar_1.6.4     bslib_0.3.1      compiler_4.1.2  
##  [5] jquerylib_0.1.4  tools_4.1.2      digest_0.6.28    jsonlite_1.7.2  
##  [9] evaluate_0.14    lifecycle_1.0.1  tibble_3.1.5     gtable_0.3.0    
## [13] pkgconfig_2.0.3  rlang_0.4.12     DBI_1.1.1        yaml_2.2.1      
## [17] xfun_0.27        fastmap_1.1.0    withr_2.4.3      stringr_1.4.0   
## [21] knitr_1.36       generics_0.1.1   vctrs_0.3.8      sass_0.4.0      
## [25] grid_4.1.2       tidyselect_1.1.1 glue_1.4.2       R6_2.5.1        
## [29] fansi_0.5.0      rmarkdown_2.11   farver_2.1.0     purrr_0.3.4     
## [33] magrittr_2.0.1   scales_1.1.1     ellipsis_0.3.2   htmltools_0.5.2 
## [37] assertthat_0.2.1 colorspace_2.0-2 labeling_0.4.2   utf8_1.2.2      
## [41] stringi_1.7.5    munsell_0.5.0    crayon_1.4.2