DATA_PATH <- here("data/processed/syntactic_bootstrapping_tidy_data.csv")
ma_data <- read.csv(DATA_PATH) %>% mutate(row_id = 1:n())
There are 53 effect sizes from 15 unique papers.
get_MA_params <- function(moderator, df) {
this_data <- df
n = nrow(this_data)
if (moderator == TRUE){
model <- rma.mv(d_calc ~ log(mean_age), V = d_var_calc,
random = ~ 1 | short_cite/same_infant/x_1,
method = "REML",
data = this_data)
this_moderator_estimate <- model$b[2]
this_moderator_estimate.cil <- model$ci.lb[2]
this_moderator_estimate.cih <- model$ci.ub[2]
this_moderator_z <- model$zval[2]
this_moderator_p <- model$pval[2]
} else{
model <- rma.mv(d_calc, V = d_var_calc,
random = ~ 1 | short_cite/same_infant/x_1,
method = "REML",
data = this_data)
this_moderator_estimate <- NA
this_moderator_estimate.cil <- NA
this_moderator_estimate.cih <- NA
this_moderator_z <- NA
this_moderator_p <- NA
}
params <- data.frame(this_moderator = moderator,
n = n,
estimate = model$b[1],
estimate.cil = model$ci.lb[1],
estimate.cih = model$ci.ub[1],
z = model$zval[1],
p = model$pval[1],
mod_estimate = this_moderator_estimate,
mod_estimate.cil = this_moderator_estimate.cil,
mod_estimate.cih = this_moderator_estimate.cih,
moderator_z = this_moderator_z,
moderator_p = this_moderator_p,
Q = model$QE,
Qp = model$QEp)
}
null_model<- get_MA_params(FALSE, ma_data)
#null_model
age_model <- get_MA_params(TRUE, ma_data)
all_models <- bind_rows(null_model,age_model)
mod_print <- all_models %>%
mutate(esimate_print = round(estimate, 2),
CI_print = paste0(" [",
round(estimate.cil, 2),
", ",
round(estimate.cih, 2),
"]"),
estimate_print_full = paste(esimate_print, CI_print),
z_print = round(z, 2),
p_print = round(p, 2),
p_print = ifelse(p_print <.001, "<.001", paste0("= ", p_print)),
mod_estimate_print = round(mod_estimate, 2),
mod_CI_print = paste0(" [",
round(mod_estimate.cil, 2),
", ",
round(mod_estimate.cih, 2),
"]"),
mod_estimate_print_full = paste(mod_estimate_print, mod_CI_print),
mod_z_print = round(moderator_z, 2),
mod_p_print = round(moderator_p, 2),
mod_p_print = ifelse(mod_p_print < .001, "<.001",
paste0("= ", mod_p_print)),
Q_print = round(Q, 2),
Qp_print = round(Qp, 2),
Qp_print = ifelse(Qp_print < .001, "<.001", paste0("= ", Qp_print)))
alpha = 0.05
individual_data <- ma_data %>%
select(short_cite, unique_id,d_calc,d_var_calc, n_1, plot_label,sentence_structure) %>%
mutate(cil = d_calc - (qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc)),
cil = case_when(
(cil < -8) ~ -8, # truncate error bar for visibility reason
TRUE ~ cil
),
ciu = d_calc +
qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc),
meta = "no",
label_color = "black",
print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = "")
)
cumulative <- mod_print %>%
filter(this_moderator == FALSE) %>%
select (estimate, estimate.cil, estimate.cih) %>%
mutate(short_cite = "Meta-Analytic Effect Size",
plot_label = "Meta-Analytic Effect Size",
d_calc = estimate,
d_var_calc = NA,
n_1 = 99,
expt_num = "",
expt_condition = "",
cil = estimate.cil,
ciu = estimate.cih,
sentence_structure = "cumulative",
print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = ""),
meta = "yes",
label_color = "red")
forest_data <- bind_rows(individual_data, cumulative)
forest_data$sentence_structure <- as.factor(forest_data$sentence_structure)
forest_data$meta <- as.factor(forest_data$meta)
forest_data <- forest_data %>%
rowid_to_column() %>%
mutate(
rowid = if_else(rowid == 0, 99, as.double(rowid)) #to always put the MA ES at bottom
) %>%
group_by(sentence_structure) %>% arrange(-rowid, .by_group = TRUE)
forest_data$plot_label <- factor(forest_data$plot_label, levels = forest_data$plot_label)
# set the neighbourhood levels in the order the occur in the data frame
label_colors <- forest_data$label_color[order(forest_data$plot_label)]
forest_data %>% # First sort by val. This sort the dataframe but NOT the factor levels
ggplot(aes(x = plot_label, y = d_calc)) +
geom_point(data = forest_data,
aes(size=n_1, shape = sentence_structure, color = sentence_structure)) +
scale_color_manual(breaks = c("cumulative", "intransitive","transitive"),
values = c("red", "black", "black"))+
scale_size(guide = 'none') +
scale_shape_manual(breaks = c("cumulative", "intransitive","transitive"),
values=c(18,16, 17)) +
#guides(color = guide_legend(override.aes = list(shape = 18, shape = 16, shape = 17))) +
geom_linerange(aes(ymin = cil, ymax = ciu, color = sentence_structure), show.legend = FALSE) +
geom_segment(aes(x = plot_label, y = d_calc, xend = plot_label, yend = cil),
linejoin = "round",
lineend = "round",
size = 0.1,
arrow = arrow(length = unit(0.1, "inches")),
data = filter(forest_data,cil == -8))+
geom_hline(aes(yintercept = 0), color = "gray44",linetype = 2) +
geom_hline(aes(yintercept = filter(forest_data, sentence_structure == "cumulative")$d_calc),
color = "red", linetype = 2) +
geom_text(aes(label = print_full, x = plot_label, y = 7),
size = 3.5, colour = label_colors) +
scale_y_continuous(breaks = seq(-10, 5, 1))+
coord_cartesian(clip = 'on') +
coord_flip() +
ylab("Cohen's d") +
labs(color = "Effect Size Type",shape = "Effect Size Type") + # merge two legends
theme(text = element_text(size=18),
legend.position="bottom",
plot.margin = unit(c(1,2,16,1), "lines"),
legend.title = element_blank(),
panel.background = element_blank(),
#panel.background = element_rect(fill = "white", colour = "grey50"),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = label_colors))
Mixed effect model:
base_model_mv <- rma.mv(d_calc, d_var_calc,
random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)
base_model_mv
##
## Multivariate Meta-Analysis Model (k = 53; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.0258 0.1607 15 no short_cite
## sigma^2.2 0.1372 0.3704 52 no short_cite/same_infant
## sigma^2.3 0.0000 0.0000 53 no short_cite/same_infant/row_id
##
## Test for Heterogeneity:
## Q(df = 52) = 140.3678, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.3218 0.0847 3.7970 0.0001 0.1557 0.4879 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
funnel(base_model_mv)
Significantly asymmetrical, by Egger’s test.
rma.mv(d_calc ~ sqrt(d_var_calc), d_var_calc,
random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)
##
## Multivariate Meta-Analysis Model (k = 53; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1106 0.3326 15 no short_cite
## sigma^2.2 0.0493 0.2221 52 no short_cite/same_infant
## sigma^2.3 0.0000 0.0000 53 no short_cite/same_infant/row_id
##
## Test for Residual Heterogeneity:
## QE(df = 51) = 121.9607, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 20.9239, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.9267 0.2874 -3.2246 0.0013 -1.4900 -0.3634 **
## sqrt(d_var_calc) 4.2628 0.9319 4.5743 <.0001 2.4363 6.0893 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
moderator_model_mv <- rma.mv(d_calc ~ mean_age + transitive_event_type2 + agent_argument_type2 +
test_mass_or_distributed + test_method + n_repetitions_video, random = ~ 1 | short_cite/same_infant/row_id, d_var_calc, data = ma_data)
funnel(moderator_model_mv)
When including theoretically relevant moderators, it is still shows significant asymmetry.
rma.mv(d_calc ~ sqrt(d_var_calc) + mean_age + transitive_event_type2 + agent_argument_type2 +
test_mass_or_distributed + test_method + n_repetitions_video, random = ~ 1 | short_cite/same_infant/row_id, d_var_calc, data = ma_data)
##
## Multivariate Meta-Analysis Model (k = 53; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.0879 0.2964 15 no short_cite
## sigma^2.2 0.0562 0.2370 52 no short_cite/same_infant
## sigma^2.3 0.0000 0.0000 53 no short_cite/same_infant/row_id
##
## Test for Residual Heterogeneity:
## QE(df = 46) = 98.0302, p-val < .0001
##
## Test of Moderators (coefficients 2:7):
## QM(df = 6) = 26.0632, p-val = 0.0002
##
## Model Results:
##
## estimate se zval pval
## intrcpt -1.7319 0.5528 -3.1328 0.0017
## sqrt(d_var_calc) 4.9317 1.0982 4.4909 <.0001
## mean_age 0.0001 0.0003 0.5047 0.6138
## transitive_event_type2indirect_caused_action -0.0583 0.1698 -0.3433 0.7314
## agent_argument_type2pronoun 0.7371 0.4363 1.6896 0.0911
## test_mass_or_distributedmass -0.6527 0.4617 -1.4138 0.1574
## n_repetitions_video 0.1243 0.0591 2.1039 0.0354
## ci.lb ci.ub
## intrcpt -2.8154 -0.6484 **
## sqrt(d_var_calc) 2.7794 7.0840 ***
## mean_age -0.0004 0.0007
## transitive_event_type2indirect_caused_action -0.3911 0.2745
## agent_argument_type2pronoun -0.1180 1.5922 .
## test_mass_or_distributedmass -1.5576 0.2521
## n_repetitions_video 0.0085 0.2400 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Checking the assumption that publication bias favors positive results only. This plot provides some evidence that studies favor “significant” results regardless of the sign. Is this enough to violate the model assumption?
pval_plot(ma_data$d_calc,
ma_data$d_var_calc, alpha.select = 0.05)
get_corrected_ma <- function(args, df){
args <- unlist(args)
cluster_val = df$unique_id
if(args[2] == "independent"){
cluster_val = 1:nrow(df)
} else {
cluster_val = df$unique_id
}
corrected_meta(
df$d_calc,
df$d_var_calc,
eta = as.numeric(args[1]),
clustervar = cluster_val,
model = "robust",
selection.tails = as.numeric(args[3]),
favor.positive = TRUE,
small = TRUE) %>%
mutate(eta = as.numeric(args[1]),
clustered = args[2],
selection_tails = as.numeric(args[3]))
}
eta_estimates <- list(c(200, 150, 100, 50, 40, 30, 20, rev(seq(1,15,1))),
c("independent", "clustered"),
c(1,2)) %>%
cross() %>%
map_df(get_corrected_ma, ma_data) %>%
mutate(selection_tails = fct_recode(as.factor(selection_tails),
"one_sided" = "1",
"two_sided" = "2"))
ggplot(eta_estimates, aes(x = eta, y = est)) +
geom_ribbon(aes(ymin = lo, ymax = hi), fill = "gray" ) +
geom_line(aes(color = clustered), size = 1.2) +
geom_hline(aes(yintercept = 0), linetype = 2) +
facet_grid(clustered ~ selection_tails) +
xlab(bquote(eta)) +
ylab(bquote(hat(mu)[eta])) +
theme_classic() +
theme(legend.position = "none")
Not sure why clustered doesn’t differ more from non-clustered?
m = significance_funnel(
ma_data$d_calc,
ma_data$d_var_calc,
xmin = min(ma_data$d_calc),
xmax = max(ma_data$d_calc),
ymin = 0,
ymax = max(sqrt(ma_data$d_var_calc)),
xlab = "Point estimate",
ylab = "Estimated standard error",
favor.positive = TRUE,
est.all = NA,
est.N = NA,
alpha.select = 0.05,
plot.pooled = TRUE
)
m
Calculate worst case point estimate:
(non-affirmative studies only; corresponds to grey diamond in plot above)
ma_data_with_affirm <- ma_data %>%
mutate(pvalue = 2 * ( 1 - pnorm( abs(d_calc / sqrt(d_var_calc)))),
affirm = (d_calc > 0) & (pvalue < 0.05))
rma.mv(d_calc, d_var_calc,
random = ~ 1 | short_cite/same_infant/row_id, data=
ma_data_with_affirm %>% filter(affirm == FALSE))
##
## Multivariate Meta-Analysis Model (k = 36; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.0096 0.0981 13 no short_cite
## sigma^2.2 0.0397 0.1993 35 no short_cite/same_infant
## sigma^2.3 0.0000 0.0000 36 no short_cite/same_infant/row_id
##
## Test for Heterogeneity:
## Q(df = 35) = 58.7096, p-val = 0.0073
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.0954 0.0682 1.3979 0.1621 -0.0383 0.2291
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimate S val (critical value of eta; publication bias) for varying levels of q. This plot shows that no value of eta could attenuate the point estimate to 0.
get_s_value <- function(i, df){
svalue(df$d_calc,
df$d_var_calc,
q = i,
clustervar = df$unique_id,
model = "robust",
alpha.select = 0.05,
eta.grid.hi = 200,
favor.positive = TRUE,
CI.level = 0.95,
small = TRUE) %>%
mutate(q = i,
sval.est = as.numeric(as.character(sval.est)),
sval.ci = as.numeric(as.character(sval.ci))) # necessary for binding rows
}
s_values <- seq(0, 0.3, by = 0.02) %>%
map_df(get_s_value, ma_data)
ggplot(s_values, aes(x = q, y = sval.est)) +
geom_point() +
geom_line() +
theme_classic()
kable(s_values)
| sval.est | sval.ci | k.affirmative | k.nonaffirmative | signs.recoded | q |
|---|---|---|---|---|---|
| NA | 12.456759 | 17 | 36 | FALSE | 0.00 |
| NA | 6.216525 | 17 | 36 | FALSE | 0.02 |
| NA | 3.955858 | 17 | 36 | FALSE | 0.04 |
| NA | 2.800038 | 17 | 36 | FALSE | 0.06 |
| NA | 2.106813 | 17 | 36 | FALSE | 0.08 |
| NA | 1.650319 | 17 | 36 | FALSE | 0.10 |
| 23.292034 | 1.330041 | 17 | 36 | FALSE | 0.12 |
| 9.282499 | 1.094366 | 17 | 36 | FALSE | 0.14 |
| 5.682525 | 1.000066 | 17 | 36 | FALSE | 0.16 |
| 4.032895 | 1.000066 | 17 | 36 | FALSE | 0.18 |
| 3.086515 | 1.000066 | 17 | 36 | FALSE | 0.20 |
| 2.472652 | 1.000066 | 17 | 36 | FALSE | 0.22 |
| 2.042251 | 1.000066 | 17 | 36 | FALSE | 0.24 |
| 1.723791 | 1.000066 | 17 | 36 | FALSE | 0.26 |
| 1.478601 | 1.000066 | 17 | 36 | FALSE | 0.28 |
| 1.283998 | 1.000066 | 17 | 36 | FALSE | 0.30 |