DATA_PATH <- here("data/processed/syntactic_bootstrapping_tidy_data.csv") 
ma_data <- read.csv(DATA_PATH) %>%  mutate(row_id = 1:n())

Forest Plot

There are 53 effect sizes from 15 unique papers.

get_MA_params <- function(moderator, df) {
  
  this_data <- df
  n = nrow(this_data)
  
  if (moderator == TRUE){
      model <- rma.mv(d_calc ~ log(mean_age), V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- model$b[2]
      this_moderator_estimate.cil <- model$ci.lb[2]
      this_moderator_estimate.cih <- model$ci.ub[2]
      this_moderator_z <- model$zval[2]
      this_moderator_p <- model$pval[2]

  } else{
      model <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- NA
      this_moderator_estimate.cil <- NA
      this_moderator_estimate.cih <- NA
      this_moderator_z <- NA
      this_moderator_p <- NA
    
  }
  
   params <- data.frame(this_moderator = moderator,
                       n = n,
                       estimate = model$b[1],
                       estimate.cil = model$ci.lb[1],
                       estimate.cih = model$ci.ub[1],
                       z = model$zval[1],
                       p = model$pval[1],
                       mod_estimate = this_moderator_estimate,
                       mod_estimate.cil = this_moderator_estimate.cil, 
                       mod_estimate.cih = this_moderator_estimate.cih,
                       moderator_z = this_moderator_z,
                       moderator_p = this_moderator_p,
                       Q = model$QE,
                       Qp = model$QEp)
  
  }

null_model<- get_MA_params(FALSE, ma_data)
#null_model
age_model <- get_MA_params(TRUE, ma_data)
all_models <- bind_rows(null_model,age_model)


mod_print <- all_models %>%
             mutate(esimate_print =  round(estimate, 2),
                    CI_print = paste0(" [", 
                                      round(estimate.cil, 2),
                                     ", ",
                                     round(estimate.cih, 2),
                                     "]"),
                   estimate_print_full = paste(esimate_print, CI_print),
                   z_print = round(z, 2),
                   p_print = round(p, 2),
                   p_print = ifelse(p_print <.001, "<.001", paste0("= ", p_print)),
                   mod_estimate_print = round(mod_estimate, 2),
                   mod_CI_print = paste0(" [", 
                                      round(mod_estimate.cil, 2),
                                     ", ",
                                     round(mod_estimate.cih, 2),
                                     "]"),
                   mod_estimate_print_full = paste(mod_estimate_print, mod_CI_print),

                   mod_z_print =  round(moderator_z, 2),
                   mod_p_print =  round(moderator_p, 2),
                   mod_p_print = ifelse(mod_p_print < .001, "<.001", 
                                        paste0("= ", mod_p_print)),
                   Q_print = round(Q, 2),
                   Qp_print = round(Qp, 2),
                   Qp_print = ifelse(Qp_print < .001, "<.001", paste0("= ", Qp_print)))

alpha = 0.05
individual_data <- ma_data %>% 
  select(short_cite, unique_id,d_calc,d_var_calc, n_1, plot_label,sentence_structure) %>% 
  mutate(cil = d_calc - (qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc)),
         cil = case_when(
          (cil < -8) ~ -8,  # truncate error bar for visibility reason 
          TRUE ~ cil
         ),
         ciu = d_calc +
               qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc), 
         meta = "no", 
         label_color = "black",
         print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = "")
         )

cumulative <- mod_print %>% 
  filter(this_moderator == FALSE) %>% 
  select (estimate, estimate.cil, estimate.cih) %>% 
  mutate(short_cite = "Meta-Analytic Effect Size",
         plot_label = "Meta-Analytic Effect Size",
         d_calc = estimate, 
         d_var_calc = NA, 
         n_1 = 99, 
         expt_num = "", 
         expt_condition = "",
         cil = estimate.cil, 
         ciu = estimate.cih, 
         sentence_structure = "cumulative",
        print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = ""),
         meta = "yes", 
        label_color = "red")

forest_data <- bind_rows(individual_data, cumulative) 
forest_data$sentence_structure <- as.factor(forest_data$sentence_structure)
forest_data$meta <- as.factor(forest_data$meta)
forest_data <- forest_data %>% 
  rowid_to_column() %>% 
  mutate(
    rowid = if_else(rowid == 0, 99, as.double(rowid)) #to always put the MA ES at bottom
  ) %>% 
  group_by(sentence_structure) %>% arrange(-rowid, .by_group = TRUE)
forest_data$plot_label <- factor(forest_data$plot_label, levels = forest_data$plot_label)



# set the neighbourhood levels in the order the occur in the data frame
label_colors <- forest_data$label_color[order(forest_data$plot_label)]

forest_data %>%  # First sort by val. This sort the dataframe but NOT the factor levels
  ggplot(aes(x = plot_label, y = d_calc)) + 
  geom_point(data = forest_data,
             aes(size=n_1, shape = sentence_structure, color = sentence_structure)) + 
  scale_color_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values = c("red", "black", "black"))+ 
  scale_size(guide = 'none') + 
  scale_shape_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values=c(18,16, 17)) +
  #guides(color = guide_legend(override.aes = list(shape = 18, shape = 16, shape = 17))) + 
  geom_linerange(aes(ymin = cil, ymax = ciu, color = sentence_structure), show.legend = FALSE) + 
  geom_segment(aes(x = plot_label, y = d_calc, xend = plot_label, yend = cil),
               linejoin = "round", 
               lineend = "round", 
               size = 0.1,
               arrow = arrow(length = unit(0.1, "inches")),
               data = filter(forest_data,cil == -8))+
  geom_hline(aes(yintercept = 0),  color = "gray44",linetype = 2) + 
  geom_hline(aes(yintercept = filter(forest_data, sentence_structure == "cumulative")$d_calc), 
             color = "red", linetype = 2) + 
  geom_text(aes(label = print_full, x = plot_label, y = 7), 
            size = 3.5, colour = label_colors) + 
  scale_y_continuous(breaks = seq(-10, 5, 1))+ 
  coord_cartesian(clip = 'on') + 
  coord_flip() + 
  ylab("Cohen's d") +
  labs(color  = "Effect Size Type",shape = "Effect Size Type") + # merge two legends 
  theme(text = element_text(size=18),
        legend.position="bottom",   
        plot.margin = unit(c(1,2,16,1), "lines"),
        legend.title = element_blank(),
        panel.background = element_blank(),
       #panel.background = element_rect(fill = "white", colour = "grey50"),
        axis.title.y = element_blank(),
        axis.text.y = element_text(colour = label_colors))

Mixed effect model:

base_model_mv <-  rma.mv(d_calc,  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)

base_model_mv

## 
## Multivariate Meta-Analysis Model (k = 53; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.0258  0.1607     15     no                     short_cite 
## sigma^2.2  0.1372  0.3704     52     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     53     no  short_cite/same_infant/row_id 
## 
## Test for Heterogeneity:
## Q(df = 52) = 140.3678, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.3218  0.0847  3.7970  0.0001  0.1557  0.4879  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Funnel Plot with no moderators

funnel(base_model_mv)

Egger’s Test

Significantly asymmetrical, by Egger’s test.

rma.mv(d_calc ~ sqrt(d_var_calc),  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)

## 
## Multivariate Meta-Analysis Model (k = 53; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1106  0.3326     15     no                     short_cite 
## sigma^2.2  0.0493  0.2221     52     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     53     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 51) = 121.9607, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 20.9239, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt            -0.9267  0.2874  -3.2246  0.0013  -1.4900  -0.3634   ** 
## sqrt(d_var_calc)    4.2628  0.9319   4.5743  <.0001   2.4363   6.0893  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Funnel Plot with key moderators

moderator_model_mv <- rma.mv(d_calc ~    mean_age + transitive_event_type2 + agent_argument_type2 + 
         test_mass_or_distributed + test_method + n_repetitions_video,  random = ~ 1 | short_cite/same_infant/row_id, d_var_calc, data = ma_data) 
funnel(moderator_model_mv)

Egger’s test

When including theoretically relevant moderators, it is still shows significant asymmetry.

rma.mv(d_calc ~   sqrt(d_var_calc) + mean_age + transitive_event_type2 + agent_argument_type2 + 
         test_mass_or_distributed + test_method + n_repetitions_video,  random = ~ 1 | short_cite/same_infant/row_id, d_var_calc, data = ma_data)

## 
## Multivariate Meta-Analysis Model (k = 53; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.0879  0.2964     15     no                     short_cite 
## sigma^2.2  0.0562  0.2370     52     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     53     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 46) = 98.0302, p-val < .0001
## 
## Test of Moderators (coefficients 2:7):
## QM(df = 6) = 26.0632, p-val = 0.0002
## 
## Model Results:
## 
##                                               estimate      se     zval    pval 
## intrcpt                                        -1.7319  0.5528  -3.1328  0.0017 
## sqrt(d_var_calc)                                4.9317  1.0982   4.4909  <.0001 
## mean_age                                        0.0001  0.0003   0.5047  0.6138 
## transitive_event_type2indirect_caused_action   -0.0583  0.1698  -0.3433  0.7314 
## agent_argument_type2pronoun                     0.7371  0.4363   1.6896  0.0911 
## test_mass_or_distributedmass                   -0.6527  0.4617  -1.4138  0.1574 
## n_repetitions_video                             0.1243  0.0591   2.1039  0.0354 
##                                                 ci.lb    ci.ub 
## intrcpt                                       -2.8154  -0.6484   ** 
## sqrt(d_var_calc)                               2.7794   7.0840  *** 
## mean_age                                      -0.0004   0.0007      
## transitive_event_type2indirect_caused_action  -0.3911   0.2745      
## agent_argument_type2pronoun                   -0.1180   1.5922    . 
## test_mass_or_distributedmass                  -1.5576   0.2521      
## n_repetitions_video                            0.0085   0.2400    * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sensitivity Analysis (Mathur & VanderWeele, 2020)

P-val plot

Checking the assumption that publication bias favors positive results only. This plot provides some evidence that studies favor “significant” results regardless of the sign. Is this enough to violate the model assumption?

pval_plot(ma_data$d_calc,
ma_data$d_var_calc, alpha.select = 0.05)

Estimate true eta

get_corrected_ma <- function(args, df){
  args <- unlist(args)

    cluster_val = df$unique_id
   if(args[2] == "independent"){
      cluster_val = 1:nrow(df)
   } else {
      cluster_val = df$unique_id
   }
  
  corrected_meta(
      df$d_calc,
      df$d_var_calc,
      eta = as.numeric(args[1]),
      clustervar = cluster_val,
      model = "robust",
      selection.tails = as.numeric(args[3]), 
      favor.positive = TRUE,
      small = TRUE) %>%
    mutate(eta = as.numeric(args[1]),
           clustered = args[2],
           selection_tails = as.numeric(args[3]))
}

eta_estimates <- list(c(200, 150, 100, 50, 40, 30, 20, rev(seq(1,15,1))),
     c("independent", "clustered"),
     c(1,2)) %>%
  cross() %>%
  map_df(get_corrected_ma, ma_data) %>%
  mutate(selection_tails = fct_recode(as.factor(selection_tails),
                                      "one_sided" = "1",
                                      "two_sided" = "2"))

ggplot(eta_estimates, aes(x = eta, y = est)) +
    geom_ribbon(aes(ymin = lo, ymax = hi), fill = "gray" ) +
    geom_line(aes(color = clustered), size = 1.2) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    facet_grid(clustered ~ selection_tails) +
    xlab(bquote(eta)) +
    ylab(bquote(hat(mu)[eta])) +
    theme_classic() +
    theme(legend.position = "none")

Not sure why clustered doesn’t differ more from non-clustered?

Eta estimate under worst case publication bias scenario

m = significance_funnel(
  ma_data$d_calc,
  ma_data$d_var_calc,
  xmin = min(ma_data$d_calc),
  xmax = max(ma_data$d_calc),
  ymin = 0,
  ymax = max(sqrt(ma_data$d_var_calc)),
  xlab = "Point estimate",
  ylab = "Estimated standard error",
  favor.positive = TRUE,
  est.all = NA,
  est.N = NA,
  alpha.select = 0.05,
  plot.pooled = TRUE
)

Calculate worst case point estimate:

(non-affirmative studies only; corresponds to grey diamond in plot above)

ma_data_with_affirm <- ma_data %>%
  mutate(pvalue =  2 * ( 1 - pnorm( abs(d_calc / sqrt(d_var_calc)))),
         affirm =  (d_calc > 0) & (pvalue < 0.05))

 rma.mv(d_calc,  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=
          ma_data_with_affirm %>% filter(affirm == FALSE))

## 
## Multivariate Meta-Analysis Model (k = 36; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.0096  0.0981     13     no                     short_cite 
## sigma^2.2  0.0397  0.1993     35     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     36     no  short_cite/same_infant/row_id 
## 
## Test for Heterogeneity:
## Q(df = 35) = 58.7096, p-val = 0.0073
## 
## Model Results:
## 
## estimate      se    zval    pval    ci.lb   ci.ub 
##   0.0954  0.0682  1.3979  0.1621  -0.0383  0.2291    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimate S val (critical value of eta; publication bias) for varying levels of q. This plot shows that no value of eta could attenuate the point estimate to 0.

get_s_value <- function(i, df){

 svalue(df$d_calc,
        df$d_var_calc,
        q = i,
        clustervar = df$unique_id,
        model = "robust",
        alpha.select = 0.05,
        eta.grid.hi = 200,
        favor.positive = TRUE,
        CI.level = 0.95,
        small = TRUE) %>%
    mutate(q = i,
           sval.est = as.numeric(as.character(sval.est)),
           sval.ci = as.numeric(as.character(sval.ci)))  # necessary for binding rows
}

s_values <- seq(0, 0.3, by = 0.02) %>%
  map_df(get_s_value, ma_data) 

ggplot(s_values, aes(x = q, y = sval.est)) +
  geom_point() +
  geom_line() +
  theme_classic()

kable(s_values)

sval.est	sval.ci	k.affirmative	k.nonaffirmative	signs.recoded	q
NA	12.456759	17	36	FALSE	0.00
NA	6.216525	17	36	FALSE	0.02
NA	3.955858	17	36	FALSE	0.04
NA	2.800038	17	36	FALSE	0.06
NA	2.106813	17	36	FALSE	0.08
NA	1.650319	17	36	FALSE	0.10
23.292034	1.330041	17	36	FALSE	0.12
9.282499	1.094366	17	36	FALSE	0.14
5.682525	1.000066	17	36	FALSE	0.16
4.032895	1.000066	17	36	FALSE	0.18
3.086515	1.000066	17	36	FALSE	0.20
2.472652	1.000066	17	36	FALSE	0.22
2.042251	1.000066	17	36	FALSE	0.24
1.723791	1.000066	17	36	FALSE	0.26
1.478601	1.000066	17	36	FALSE	0.28
1.283998	1.000066	17	36	FALSE	0.30

Syntactic Bootstrapping Publication Bias

Anjie Cao and Molly Lewis

8/24/2020