library(pacman); p_load(pwr, tidyr, dplyr, ggplot2, faux, boot)
CRITR <- function(n, alpha = .05) {
df <- n - 2; CRITT <- qt(alpha/2, df, lower.tail = F)
CRITR <- sqrt((CRITT^2)/((CRITT^2) + df ))
return(CRITR)}
MeanSigR <- function(N, P = 0.05, R, Reps, seed = 1){
library(pacman); p_load(faux, boot)
set.seed(seed)
RepList <- replicate(n = Reps, rnorm_multi(
n = N, r = R, varnames = c("X", "Y")), simplify = F)
CorList <- (lapply(RepList, corr)); CorListNum <- as.numeric(CorList)
CorListCut <- CorListNum[CorListNum > CRITR(N, P)]
Res <- mean(CorListCut); return(Res)}
MeanSigT <- function(N, P = 0.05, md, sd = 1, R = 0, Reps, seed = 1, sides = 2){
library(pacman); p_load(compute.es, faux)
set.seed(seed)
RepList <- replicate(n = Reps, rnorm_multi(
n = N, r = R, mu = c(0, md), sd = c(1, sd), varnames = c("X", "Y")), simplify = F)
MeanList <- lapply(RepList, t.test)
MeanListSmall <- lapply(MeanList, "[", c("statistic", "estimate")); MeanListNum <- as.numeric(unlist(MeanListSmall))
MeanListD <- tes(MeanListNum, n.1 = N, n.2 = N, verbose = F)$d
MeanListP <- tes(MeanListNum, n.1 = N, n.2 = N, verbose = F)$pval.d
MeanListCut <- MeanListD[MeanListP < P]
Res <- mean(MeanListCut); return(Res)}
This replicates the plots from Yarkoni (2009) because I needed these plots for a discussion of Troller-Renfree et al. (2022).
powlevels <- seq(0.05, 1, 0.01)
Power13 <- pwr.r.test(r = powlevels,
n = 13,
sig.level = 0.05)
Power17 <- pwr.r.test(r = powlevels,
n = 17,
sig.level = 0.05)
Power63 <- pwr.r.test(r = powlevels,
n = 63,
sig.level = 0.05)
Power64 <- pwr.r.test(r = powlevels,
n = 64,
sig.level = 0.05)
Power81 <- pwr.r.test(r = powlevels,
n = 81,
sig.level = 0.05)
Power90 <- pwr.r.test(r = powlevels,
n = 90,
sig.level = 0.05)
Power129 <- pwr.r.test(r = powlevels,
n = 129,
sig.level = 0.05)
PowerFrame <- data.frame(R = powlevels, Power = powlevels, ES13 = Power13$power, ES17 = Power17$power,
#ES63 = Power63$power, #Too similar to N = 64
ES64 = Power64$power, ES81 = Power81$power, ES90 = Power90$power, ES129 = Power129$power)
PowerFrame <- pivot_longer(PowerFrame,
cols = starts_with("ES"),
names_to = "test_type",
values_to = "values")
PowerFrame$legend_label <- gsub("ES", "n = ", PowerFrame$test_type)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))
ggplot(PowerFrame, aes(x = R, y = values, group = legend_label, color = legend_label)) +
geom_line(size = 1) +
theme_bw() + labs(x = "Correlation", y = "Power", color = "Sample Size") +
theme(legend.position = c(0.95, 0.15)) + geom_hline(yintercept = 0.80, linetype = "dashed")
O16 <- MeanSigR(N = 6, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O110 <- MeanSigR(N = 10, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O120 <- MeanSigR(N = 20, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O130 <- MeanSigR(N = 30, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O140 <- MeanSigR(N = 40, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O150 <- MeanSigR(N = 50, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O160 <- MeanSigR(N = 60, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O170 <- MeanSigR(N = 70, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O180 <- MeanSigR(N = 80, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O190 <- MeanSigR(N = 90, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1100 <- MeanSigR(N = 100, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1125 <- MeanSigR(N = 125, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1150 <- MeanSigR(N = 150, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1175 <- MeanSigR(N = 175, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O1200 <- MeanSigR(N = 200, P = 0.05, R = 0.1, Reps = 10000, seed = 100)
O36 <- MeanSigR(N = 6, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O310 <- MeanSigR(N = 10, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O320 <- MeanSigR(N = 20, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O330 <- MeanSigR(N = 30, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O340 <- MeanSigR(N = 40, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O350 <- MeanSigR(N = 50, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O360 <- MeanSigR(N = 60, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O370 <- MeanSigR(N = 70, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O380 <- MeanSigR(N = 80, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O390 <- MeanSigR(N = 90, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3100 <- MeanSigR(N = 100, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3125 <- MeanSigR(N = 125, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3150 <- MeanSigR(N = 150, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3175 <- MeanSigR(N = 175, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O3200 <- MeanSigR(N = 200, P = 0.05, R = 0.3, Reps = 10000, seed = 100)
O56 <- MeanSigR(N = 6, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O510 <- MeanSigR(N = 10, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O520 <- MeanSigR(N = 20, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O530 <- MeanSigR(N = 30, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O540 <- MeanSigR(N = 40, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O550 <- MeanSigR(N = 50, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O560 <- MeanSigR(N = 60, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O570 <- MeanSigR(N = 70, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O580 <- MeanSigR(N = 80, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O590 <- MeanSigR(N = 90, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5100 <- MeanSigR(N = 100, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5125 <- MeanSigR(N = 125, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5150 <- MeanSigR(N = 150, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5175 <- MeanSigR(N = 175, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O5200 <- MeanSigR(N = 200, P = 0.05, R = 0.5, Reps = 10000, seed = 100)
O76 <- MeanSigR(N = 6, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O710 <- MeanSigR(N = 10, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O720 <- MeanSigR(N = 20, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O730 <- MeanSigR(N = 30, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O740 <- MeanSigR(N = 40, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O750 <- MeanSigR(N = 50, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O760 <- MeanSigR(N = 60, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O770 <- MeanSigR(N = 70, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O780 <- MeanSigR(N = 80, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O790 <- MeanSigR(N = 90, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7100 <- MeanSigR(N = 100, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7125 <- MeanSigR(N = 125, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7150 <- MeanSigR(N = 150, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7175 <- MeanSigR(N = 175, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
O7200 <- MeanSigR(N = 200, P = 0.05, R = 0.7, Reps = 10000, seed = 100)
SS <- c(5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200)
PowerFrame <- data.frame(SS = SS,
O1 = c(O16, O110, O120, O130, O140, O150, O160, O170, O180, O190, O1100, O1125, O1150, O1175, O1200),
O3 = c(O36, O310, O320, O330, O340, O350, O360, O370, O380, O390, O3100, O3125, O3150, O3175, O3200),
O5 = c(O56, O510, O520, O530, O540, O550, O560, O570, O580, O590, O5100, O5125, O5150, O5175, O5200),
O7 = c(O76, O710, O720, O730, O740, O750, O760, O770, O780, O790, O7100, O7125, O7150, O7175, O7200))
PowerFrame <- pivot_longer(PowerFrame,
cols = starts_with("O"),
names_to = "R_Size",
values_to = "values")
PowerFrame$legend_label <- gsub("O", "r = 0.", PowerFrame$R_Size)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
## Warning: NAs introduced by coercion
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))
ggplot(PowerFrame, aes(x = SS, y = values, group = legend_label, color = legend_label)) +
geom_line(size = 1) +
geom_point() +
theme_bw() + labs(x = "Sample Size", y = "Mean Significant Correlation", color = "True Correlation") +
geom_hline(yintercept = 0.70, linetype = "dashed" ) +
geom_hline(yintercept = 0.50, linetype = "dotdash") +
geom_hline(yintercept = 0.30, linetype = "dotted" ) +
geom_hline(yintercept = 0.10, linetype = "twodash") +
theme(legend.position = c(0.945, 0.91), legend.background = element_blank())
It may also be worthwhile to produce a plot of the likelihood of a false positive under \(H_0\) given a certain number of comparisons.
Comps <- seq(1, 100, 1)
FPC <- 1-0.95^Comps
MCDF <- data.frame("Comparisons" = c(Comps), "FalsePositives" = c(FPC))
ggplot(MCDF, aes(x = Comparisons, y = FalsePositives)) +
geom_line(size = 1) + theme_bw() +
labs(x = "Number of Comparisons", y = "False Positive Probability under H0")
We can also plot mean significant d’s versus sample sizes.
O110 <- MeanSigT(N = 10, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O120 <- MeanSigT(N = 20, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O130 <- MeanSigT(N = 30, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O140 <- MeanSigT(N = 40, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O150 <- MeanSigT(N = 50, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O160 <- MeanSigT(N = 60, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O170 <- MeanSigT(N = 70, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O180 <- MeanSigT(N = 80, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O190 <- MeanSigT(N = 90, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1100 <- MeanSigT(N = 100, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1125 <- MeanSigT(N = 125, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1150 <- MeanSigT(N = 150, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1175 <- MeanSigT(N = 175, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O1200 <- MeanSigT(N = 200, P = 0.05, md = 0.1, Reps = 100000, seed = 100)
O310 <- MeanSigT(N = 10, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O320 <- MeanSigT(N = 20, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O330 <- MeanSigT(N = 30, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O340 <- MeanSigT(N = 40, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O350 <- MeanSigT(N = 50, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O360 <- MeanSigT(N = 60, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O370 <- MeanSigT(N = 70, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O380 <- MeanSigT(N = 80, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O390 <- MeanSigT(N = 90, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3100 <- MeanSigT(N = 100, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3125 <- MeanSigT(N = 125, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3150 <- MeanSigT(N = 150, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3175 <- MeanSigT(N = 175, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O3200 <- MeanSigT(N = 200, P = 0.05, md = 0.3, Reps = 10000, seed = 100)
O510 <- MeanSigT(N = 10, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O520 <- MeanSigT(N = 20, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O530 <- MeanSigT(N = 30, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O540 <- MeanSigT(N = 40, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O550 <- MeanSigT(N = 50, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O560 <- MeanSigT(N = 60, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O570 <- MeanSigT(N = 70, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O580 <- MeanSigT(N = 80, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O590 <- MeanSigT(N = 90, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5100 <- MeanSigT(N = 100, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5125 <- MeanSigT(N = 125, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5150 <- MeanSigT(N = 150, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5175 <- MeanSigT(N = 175, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O5200 <- MeanSigT(N = 200, P = 0.05, md = 0.5, Reps = 10000, seed = 100)
O910 <- MeanSigT(N = 10, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O920 <- MeanSigT(N = 20, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O930 <- MeanSigT(N = 30, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O940 <- MeanSigT(N = 40, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O950 <- MeanSigT(N = 50, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O960 <- MeanSigT(N = 60, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O970 <- MeanSigT(N = 70, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O980 <- MeanSigT(N = 80, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O990 <- MeanSigT(N = 90, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9100 <- MeanSigT(N = 100, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9125 <- MeanSigT(N = 125, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9150 <- MeanSigT(N = 150, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9175 <- MeanSigT(N = 175, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
O9200 <- MeanSigT(N = 200, P = 0.05, md = 0.9, Reps = 10000, seed = 100)
SS <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200)
PowerFrame <- data.frame(SS = SS,
O1 = c(O110, O120, O130, O140, O150, O160, O170, O180, O190, O1100, O1125, O1150, O1175, O1200),
O3 = c(O310, O320, O330, O340, O350, O360, O370, O380, O390, O3100, O3125, O3150, O3175, O3200),
O5 = c(O510, O520, O530, O540, O550, O560, O570, O580, O590, O5100, O5125, O5150, O5175, O5200),
O9 = c(O910, O920, O930, O940, O950, O960, O970, O980, O990, O9100, O9125, O9150, O9175, O9200))
PowerFrame <- pivot_longer(PowerFrame,
cols = starts_with("O"),
names_to = "D_Size",
values_to = "values")
PowerFrame$legend_label <- gsub("O", "d = 0.", PowerFrame$D_Size)
PowerFrame$legend_order <- as.numeric(gsub("n =", "", PowerFrame$legend_label))
## Warning: NAs introduced by coercion
PowerFrame$legend_label <- factor(PowerFrame$legend_label,
levels = unique(PowerFrame$legend_label[order(PowerFrame$legend_order, decreasing = TRUE)]))
ggplot(PowerFrame, aes(x = SS, y = values, group = legend_label, color = legend_label)) +
geom_line(size = 1) +
geom_point() +
theme_bw() + labs(x = "Sample Size", y = "Mean Significant Mean Difference", color = "True Mean Difference") +
geom_hline(yintercept = 0.90, linetype = "dashed" ) +
geom_hline(yintercept = 0.50, linetype = "dotdash") +
geom_hline(yintercept = 0.30, linetype = "dotted" ) +
geom_hline(yintercept = 0.10, linetype = "twodash") +
theme(legend.position = c(0.925, 0.91), legend.background = element_blank())
Mean significant differences eventually converge to below their true level, but effect size exaggeration is extreme even with quite large samples. The reason the d = 0.1 results had to be repeated 100000 times was because they were unstable at only 10000 repetitions because there were too few significant effects.
Yarkoni, Tal. 2009. “Big Correlations in Little Studies: Inflated FMRI Correlations Reflect Low Statistical Power—Commentary on Vul et al. (2009).” Perspectives on Psychological Science 4(3):294–98. doi: 10.1111/j.1745-6924.2009.01127.x.
Troller-Renfree, Sonya V., Molly A. Costanzo, Greg J. Duncan, Katherine Magnuson, Lisa A. Gennetian, Hirokazu Yoshikawa, Sarah Halpern-Meekin, Nathan A. Fox, and Kimberly G. Noble. 2022. “The Impact of a Poverty Reduction Intervention on Infant Brain Activity.” Proceedings of the National Academy of Sciences 119(5). doi: 10.1073/pnas.2115649119.
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] compute.es_0.2-5 boot_1.3-28 faux_1.1.0 ggplot2_3.3.5
## [5] dplyr_1.0.7 tidyr_1.1.4 pwr_1.3-0 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] highr_0.9 pillar_1.6.4 bslib_0.3.1 compiler_4.1.2
## [5] jquerylib_0.1.4 tools_4.1.2 digest_0.6.28 jsonlite_1.7.2
## [9] evaluate_0.14 lifecycle_1.0.1 tibble_3.1.5 gtable_0.3.0
## [13] pkgconfig_2.0.3 rlang_0.4.12 DBI_1.1.1 yaml_2.2.1
## [17] xfun_0.27 fastmap_1.1.0 withr_2.4.3 stringr_1.4.0
## [21] knitr_1.36 generics_0.1.1 vctrs_0.3.8 sass_0.4.0
## [25] grid_4.1.2 tidyselect_1.1.1 glue_1.4.2 R6_2.5.1
## [29] fansi_0.5.0 rmarkdown_2.11 farver_2.1.0 purrr_0.3.4
## [33] magrittr_2.0.1 scales_1.1.1 ellipsis_0.3.2 htmltools_0.5.2
## [37] assertthat_0.2.1 colorspace_2.0-2 labeling_0.4.2 utf8_1.2.2
## [41] stringi_1.7.5 munsell_0.5.0 crayon_1.4.2