TIDY_DATA_PATH <- here("data/analysis_1/tidy_krvaven.csv")
tidy_data<- read_csv(TIDY_DATA_PATH)
wide_df_full <- tidy_data %>%
select(table_name, id, n, es) %>%
pivot_wider(names_from = "table_name", values_from = c(n, es)) %>%
select(id, es_original_studies, es_replication_studies, es_RE_MA,
"es_PET-PEESE_MA", es_PSM_MA, es_TF_MA) %>%
left_join(tidy_data %>% filter(table_name == "original_MA") %>%
select(id, tau)) %>%
mutate(tau2 = tau,
tau = sqrt(tau2),
log_tau2 = log(tau2 + .0001))
wide_df_full %>%
select(-id) %>%
make_corr_plot()
Some evidence that a replication effect size is bigger when the corresponding meta-analysis is hetergeneous.
One effect - Srull & Wyer (1979) - has a massive original effect size (~3) that strongly influences the magnitude of the correlations with the original es. Here’s what the correlations look like when you exclude this point.
wide_df_full %>%
filter(id != 14) %>%
select(-id) %>%
make_corr_plot()
# wide df of original, ma, and replication with n and es, and cis for es
wide_df <- tidy_data %>%
select(table_name, id, n, es) %>%
pivot_wider(names_from = "table_name",
values_from = c(n, es)) %>%
mutate(n_RE_MA = n_original_MA) %>%
select(id, n_replication_studies, n_original_studies, n_RE_MA,
es_replication_studies, es_original_studies, es_RE_MA) %>%
left_join(tidy_data %>% filter(table_name == "replication_studies") %>%
select(id, ci_lower, ci_upper)) %>%
rename(ci_lower_es_replication = ci_lower,
ci_upper_es_replication = ci_upper) %>%
left_join(tidy_data %>% filter(table_name == "RE_MA") %>%
select(id, ci_lower, ci_upper)) %>%
rename(ci_lower_es_RE_MA = ci_lower,
ci_upper_es_RE_MA = ci_upper) %>%
left_join(tidy_data %>% filter(table_name == "original_studies") %>%
select(id, ci_lower, ci_upper)) %>%
rename(ci_lower_es_original_study = ci_lower,
ci_upper_es_original_study = ci_upper) %>%
left_join(tidy_data %>% filter(table_name == "original_MA") %>%
select(id, tau)) %>%
mutate(tau2 = tau,
tau = sqrt(tau2),
log_tau2 = log(tau2 + .0001))
rep_ma <- cor.test(wide_df$es_RE_MA,
wide_df$es_replication_studies)
ggplot(wide_df, aes(y = es_replication_studies,
x = es_RE_MA)) +
geom_errorbarh(aes(xmin = ci_lower_es_RE_MA,
xmax = ci_upper_es_RE_MA, height = 0)) +
geom_pointrange(aes(ymin = ci_lower_es_replication,
ymax = ci_upper_es_replication))+
geom_abline(intercept = 0, slope = 1, linetype = 2) +
ggtitle("Re-Analysis of Kvarven et al.") +
geom_smooth(method = "lm", alpha = .2) +
ylim(-.3, 1.2) +
annotate("text", x = .9, y = -.1,
label = paste0("r = ", round(rep_ma$estimate,2)),
size = 8, color = "red") +
ylab("Multi-Lab Replication Effect Size") +
xlab("Meta-Analytic Random Effect Effect Size") +
theme_classic(base_size = 15)
original_ma <- cor.test(wide_df$es_original_studies,
wide_df$es_replication_studies)
wide_df %>%
ggplot( aes(y = es_replication_studies,
x = es_original_studies)) +
geom_pointrange(aes(ymin = ci_lower_es_replication,
ymax = ci_upper_es_replication)) +
geom_errorbarh(aes(xmin = ci_lower_es_original_study,
xmax = ci_upper_es_original_study, height = 0)) +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
ylim(-.3, 1.2) +
ggtitle("Re-Analysis of Kvarven et al.") +
geom_smooth(method = "lm", alpha = .2) +
annotate("text", x = .9, y = -.1,
label = paste0("r = ", round(original_ma$estimate,2)),
size = 8, color = "red") +
ylab("Multi-Lab Replication Effect Size") +
xlab("Original Study Effect Size") +
theme_classic(base_size = 15)
wide_df_excl <- wide_df %>%
filter(id != 14)
rep_ma_excl <- cor.test(wide_df_excl$es_RE_MA, wide_df_excl$es_replication_studies)
ggplot(wide_df_excl, aes(y = es_replication_studies,
x = es_RE_MA)) +
geom_errorbarh(aes(xmin = ci_lower_es_RE_MA,
xmax = ci_upper_es_RE_MA, height = 0)) +
geom_pointrange(aes(ymin = ci_lower_es_replication,
ymax = ci_upper_es_replication))+
geom_abline(intercept = 0, slope = 1, linetype = 2) +
ggtitle("Re-Analysis of Kvarven et al.") +
geom_smooth(method = "lm", alpha = .2) +
ylim(-.3, 1.2) +
annotate("text", x = .9, y = -.1,
label = paste0("r = ", round(rep_ma_excl$estimate,2)),
size = 8, color = "red") +
ylab("Multi-Lab Replication Effect Size") +
xlab("Meta-Analytic Random Effect Effect Size") +
theme_classic(base_size = 15)
original_ma_excl <- cor.test(wide_df_excl$es_original_studies, wide_df_excl$es_replication_studies)
wide_df_excl %>%
ggplot( aes(y = es_replication_studies,
x = es_original_studies)) +
geom_pointrange(aes(ymin = ci_lower_es_replication,
ymax = ci_upper_es_replication)) +
geom_errorbarh(aes(xmin = ci_lower_es_original_study,
xmax = ci_upper_es_original_study, height = 0)) +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
ggtitle("Re-Analysis of Kvarven et al.") +
geom_smooth(method = "lm", alpha = .2) +
ylim(-.3, 1.2) +
annotate("text", x = .9, y = -.1,
label = paste0("r = ", round(original_ma_excl$estimate,2)),
size = 8, color = "red") +
ylab("Multi-Lab Replication Effect Size") +
xlab("Original Study Effect Size") +
theme_classic(base_size = 15)
wide_df %>%
ggplot( aes(y = es_replication_studies,
x = tau2)) +
geom_pointrange(aes(ymin = ci_lower_es_replication,
ymax = ci_upper_es_replication)) +
ggtitle("Re-Analysis of Kvarven et al.") +
geom_smooth(method = "lm", alpha = .2) +
ylab("Multi-Lab Replication Effect Size") +
xlab("log Tau2") +
theme_classic(base_size = 15)
Comparing replication minus MA estimate vs. replicaiton minus original estimate. Note: this is just the simple difference, but we probably want a model that takes into account the sample sizes for each effect. I think they did some version of this in the paper, but I can’t figure out what they did.
es_diffs <- wide_df %>%
select(id, contains("es_"), contains("tau"),
-contains("ci")) %>%
mutate(abs_dif_original = abs(es_replication_studies - es_original_studies),
abs_dif_ma = abs(es_replication_studies - es_RE_MA),
dif_original = es_replication_studies - es_original_studies,
dif_ma = es_replication_studies - es_RE_MA) %>%
select(id, contains("dif"), contains("tau"))
es_diffs %>%
select(-id) %>%
make_corr_plot()
es_diffs_long <- es_diffs %>%
pivot_longer(-id, names_to = "measure",
values_to = "es_dif") %>%
filter(!(measure %in% c("tau", "log_tau2", "tau2",
"abs_dif_ma",
"abs_dif_original")))
mean_diffs <- es_diffs_long %>%
group_by(measure) %>%
summarize(mean = mean(es_dif),
sd = sd(es_dif),
n = n(),
ci_95_range = 1.96 * sd/sqrt(n),
ci_lower = mean - ci_95_range,
ci_upper = mean + ci_95_range) %>%
mutate(y = c(2.5, 2.4))
ggplot(es_diffs_long) +
geom_density(aes(fill = measure, x = es_dif), alpha = .5) +
geom_point(data = mean_diffs, aes(x = mean, y = y,
color = measure),
size = 3) +
geom_errorbarh(data = mean_diffs,
aes(xmin = ci_lower, xmax = ci_upper,
color = measure, y = y),
height = 0, size = 1) +
xlab("Difference from Replication Estimate") +
theme_classic(base_size = 15)
es_diffs_excl <- wide_df_excl %>%
select(id, contains("es_"), contains("tau"),
-contains("ci")) %>%
mutate(abs_dif_original = abs(es_replication_studies - es_original_studies),
abs_dif_ma = abs(es_replication_studies - es_RE_MA),
dif_original = es_replication_studies - es_original_studies,
dif_ma = es_replication_studies - es_RE_MA) %>%
select(id, contains("dif"), contains("tau"))
es_diffs_excl %>%
select(-id) %>%
make_corr_plot()
es_diffs_excl_long <- es_diffs_excl %>%
pivot_longer(-id, names_to = "measure",
values_to = "es_dif") %>%
filter(!(measure %in% c("tau", "log_tau2", "tau2",
"abs_dif_ma", "abs_dif_original")))
mean_diffs_excl <- es_diffs_excl_long %>%
group_by(measure) %>%
summarize(mean = mean(es_dif),
sd = sd(es_dif),
n = n(),
ci_95_range = 1.96 * sd/sqrt(n),
ci_lower = mean - ci_95_range,
ci_upper = mean + ci_95_range) %>%
mutate(y = c(2.5, 2.4))
ggplot(es_diffs_excl_long) +
geom_density(aes(fill = measure, x = es_dif), alpha = .5) +
geom_point(data = mean_diffs_excl, aes(x = mean, y = y,
color = measure),
size = 3) +
geom_errorbarh(data = mean_diffs_excl,
aes(xmin = ci_lower, xmax = ci_upper,
color = measure, y = y), height = 0, size = 1) +
xlab("Difference from Replication Estimate") +
theme_classic(base_size = 15)
lm(es_replication_studies ~ es_RE_MA, data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA, data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26987 -0.07912 0.00618 0.07385 0.35460
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1781 0.1006 -1.770 0.1001
## es_RE_MA 0.7960 0.2144 3.713 0.0026 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1759 on 13 degrees of freedom
## Multiple R-squared: 0.5147, Adjusted R-squared: 0.4774
## F-statistic: 13.79 on 1 and 13 DF, p-value: 0.002604
lm(es_replication_studies ~ es_original_studies, data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_original_studies, data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23211 -0.14018 -0.09580 0.07094 0.61782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12202 0.09330 1.308 0.214
## es_original_studies 0.04754 0.09664 0.492 0.631
##
## Residual standard error: 0.2502 on 13 degrees of freedom
## Multiple R-squared: 0.01828, Adjusted R-squared: -0.05724
## F-statistic: 0.242 on 1 and 13 DF, p-value: 0.631
lm(es_replication_studies ~ log_tau2, data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ log_tau2, data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29852 -0.11801 -0.06239 0.04632 0.48927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.32909 0.11259 2.923 0.0119 *
## log_tau2 0.05415 0.02998 1.806 0.0941 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2258 on 13 degrees of freedom
## Multiple R-squared: 0.2006, Adjusted R-squared: 0.1391
## F-statistic: 3.262 on 1 and 13 DF, p-value: 0.09411
lm(es_replication_studies ~ es_RE_MA + es_original_studies,
data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies,
## data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26425 -0.09070 0.00714 0.07577 0.35026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.18381 0.11064 -1.661 0.1225
## es_RE_MA 0.79081 0.22525 3.511 0.0043 **
## es_original_studies 0.01136 0.07139 0.159 0.8762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1829 on 12 degrees of freedom
## Multiple R-squared: 0.5157, Adjusted R-squared: 0.435
## F-statistic: 6.389 on 2 and 12 DF, p-value: 0.0129
lm(es_replication_studies ~ es_RE_MA + log_tau2,
data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + log_tau2, data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20583 -0.08834 -0.04040 0.05963 0.33438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01012 0.11091 -0.091 0.92880
## es_RE_MA 0.76390 0.18389 4.154 0.00134 **
## log_tau2 0.04810 0.02004 2.400 0.03351 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1505 on 12 degrees of freedom
## Multiple R-squared: 0.6721, Adjusted R-squared: 0.6175
## F-statistic: 12.3 on 2 and 12 DF, p-value: 0.001243
lm(es_replication_studies ~ es_RE_MA + es_original_studies + log_tau2,
data = wide_df) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies +
## log_tau2, data = wide_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20241 -0.08912 -0.03926 0.06078 0.33171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.013965 0.120449 -0.116 0.90979
## es_RE_MA 0.760722 0.193914 3.923 0.00238 **
## es_original_studies 0.007092 0.061347 0.116 0.91005
## log_tau2 0.048023 0.020927 2.295 0.04242 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1571 on 11 degrees of freedom
## Multiple R-squared: 0.6725, Adjusted R-squared: 0.5832
## F-statistic: 7.529 on 3 and 11 DF, p-value: 0.005171
lm(es_replication_studies ~ es_RE_MA + es_original_studies + tau2,
data = wide_df, weights = log(n_original_studies)) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies +
## tau2, data = wide_df, weights = log(n_original_studies))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.30477 -0.24842 -0.07912 0.10910 0.87365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.16177 0.09137 -1.770 0.1043
## es_RE_MA 0.52557 0.21750 2.416 0.0342 *
## es_original_studies 0.03755 0.06859 0.548 0.5950
## tau2 0.75686 0.30341 2.494 0.0298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3494 on 11 degrees of freedom
## Multiple R-squared: 0.7426, Adjusted R-squared: 0.6724
## F-statistic: 10.58 on 3 and 11 DF, p-value: 0.001431
lm(es_replication_studies ~ es_RE_MA, data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA, data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27547 -0.06883 0.00221 0.07031 0.34856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1691 0.1048 -1.614 0.13258
## es_RE_MA 0.7906 0.2208 3.581 0.00378 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.181 on 12 degrees of freedom
## Multiple R-squared: 0.5166, Adjusted R-squared: 0.4763
## F-statistic: 12.82 on 1 and 12 DF, p-value: 0.003776
lm(es_replication_studies ~ es_original_studies, data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_original_studies, data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24468 -0.15370 0.02022 0.05209 0.35259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1770 0.1119 -1.581 0.13979
## es_original_studies 0.6415 0.1888 3.398 0.00529 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1858 on 12 degrees of freedom
## Multiple R-squared: 0.4904, Adjusted R-squared: 0.4479
## F-statistic: 11.55 on 1 and 12 DF, p-value: 0.00529
lm(es_replication_studies ~ log_tau2, data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ log_tau2, data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30632 -0.12315 -0.06450 0.06408 0.48184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.33633 0.11679 2.880 0.0138 *
## log_tau2 0.05385 0.03088 1.744 0.1067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2325 on 12 degrees of freedom
## Multiple R-squared: 0.2022, Adjusted R-squared: 0.1357
## F-statistic: 3.042 on 1 and 12 DF, p-value: 0.1067
lm(es_replication_studies ~ es_RE_MA + es_original_studies,
data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies,
## data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23289 -0.10847 0.01125 0.11663 0.19295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2873 0.1033 -2.782 0.0179 *
## es_RE_MA 0.5429 0.2173 2.498 0.0296 *
## es_original_studies 0.4188 0.1810 2.314 0.0410 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.155 on 11 degrees of freedom
## Multiple R-squared: 0.6749, Adjusted R-squared: 0.6157
## F-statistic: 11.42 on 2 and 11 DF, p-value: 0.002072
lm(es_replication_studies ~ es_RE_MA + log_tau2,
data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + log_tau2, data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.21135 -0.09022 -0.04366 0.05342 0.32877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002364 0.114832 -0.021 0.98394
## es_RE_MA 0.758914 0.189326 4.008 0.00206 **
## log_tau2 0.047909 0.020612 2.324 0.04027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1548 on 11 degrees of freedom
## Multiple R-squared: 0.6758, Adjusted R-squared: 0.6168
## F-statistic: 11.46 on 2 and 11 DF, p-value: 0.00204
lm(es_replication_studies ~ es_RE_MA + es_original_studies + log_tau2,
data = wide_df_excl) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies +
## log_tau2, data = wide_df_excl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20435 -0.08035 0.01668 0.07784 0.19309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.12621 0.10814 -1.167 0.2703
## es_RE_MA 0.54444 0.18032 3.019 0.0129 *
## es_original_studies 0.36898 0.15156 2.435 0.0352 *
## log_tau2 0.04225 0.01729 2.444 0.0346 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1286 on 10 degrees of freedom
## Multiple R-squared: 0.7964, Adjusted R-squared: 0.7354
## F-statistic: 13.04 on 3 and 10 DF, p-value: 0.000862
lm(es_replication_studies ~ es_RE_MA + es_original_studies + log_tau2,
data = wide_df_excl, weights = log(n_original_studies)) %>%
summary()
##
## Call:
## lm(formula = es_replication_studies ~ es_RE_MA + es_original_studies +
## log_tau2, data = wide_df_excl, weights = log(n_original_studies))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.43037 -0.17267 0.00641 0.15716 0.46397
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10986 0.11233 -0.978 0.3512
## es_RE_MA 0.50524 0.17330 2.915 0.0154 *
## es_original_studies 0.40796 0.15414 2.647 0.0245 *
## log_tau2 0.04466 0.01850 2.414 0.0364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2874 on 10 degrees of freedom
## Multiple R-squared: 0.8388, Adjusted R-squared: 0.7904
## F-statistic: 17.34 on 3 and 10 DF, p-value: 0.0002742