DATA_PATH <- here("data/processed/syntactic_bootstrapping_tidy_data_molly.csv") 

ma_data <- read_csv(DATA_PATH) %>%
  mutate(row_id = 1:n()) %>%
  mutate(agent_argument_type2 = case_when(str_detect(agent_argument_type, "pronoun") ~ "pronoun",
                                          TRUE ~ "noun"),
         transitive_event_type2 = case_when(transitive_event_type == "direct_caused_action" ~ "direct_caused_action",
                                            TRUE ~ "indirect_caused_action"))

Funnel plot/Egger’s test

Funnel plot of multi-level model with no moderators:

base_model_mv <-  rma.mv(d_calc,  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)

funnel(base_model_mv)

Visually, it looks like there may be some asymmetry. We can formally test for asymmetry with Egger’s test. Eggers test is a regression predicting d_calc with some measure of effect size variance, often standard error. Here’s what it looks like in the non-multi-level model:

base_model_no_mv <-  rma(d_calc,  d_var_calc, data=ma_data)
regtest(base_model_no_mv, predictor = 'sei')
## 
## Regression Test for Funnel Plot Asymmetry
## 
## model:     mixed-effects meta-regression model
## predictor: standard error
## 
## test for funnel plot asymmetry: z = 3.8577, p = 0.0001
regtest(base_model_no_mv, predictor = 'sei', ret.fit = T)
## 
## Regression Test for Funnel Plot Asymmetry
## 
## model:     mixed-effects meta-regression model
## predictor: standard error
## 
## Mixed-Effects Model (k = 53; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.1161 (SE = 0.0416)
## tau (square root of estimated tau^2 value):             0.3407
## I^2 (residual heterogeneity / unaccounted variability): 57.36%
## H^2 (unaccounted variability / sampling variability):   2.35
## R^2 (amount of heterogeneity accounted for):            23.89%
## 
## Test for Residual Heterogeneity:
## QE(df = 51) = 120.5874, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 14.8815, p-val = 0.0001
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt   -0.5567  0.2395  -2.3243  0.0201  -1.0261  -0.0873    * 
## sei        2.9169  0.7561   3.8577  0.0001   1.4349   4.3989  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## test for funnel plot asymmetry: z = 3.8577, p = 0.0001

This is equivalent to the following:

rma(d_calc ~ sqrt(d_var_calc),  d_var_calc, data = ma_data)
## 
## Mixed-Effects Model (k = 53; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.1161 (SE = 0.0416)
## tau (square root of estimated tau^2 value):             0.3407
## I^2 (residual heterogeneity / unaccounted variability): 57.36%
## H^2 (unaccounted variability / sampling variability):   2.35
## R^2 (amount of heterogeneity accounted for):            23.89%
## 
## Test for Residual Heterogeneity:
## QE(df = 51) = 120.5874, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 14.8815, p-val = 0.0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt            -0.5567  0.2395  -2.3243  0.0201  -1.0261  -0.0873    * 
## sqrt(d_var_calc)    2.9169  0.7561   3.8577  0.0001   1.4349   4.3989  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can visualize the relationship by plotting d_calc vs. sei

ggplot(ma_data, aes(x = sqrt(d_var_calc), y = d_calc)) +
  geom_point() +
  geom_smooth(method = "lm")

… We see a strong linear relationship between effect size variance and the size of the effect: Bigger effect sizes have bigger variance. There should be no relationship in the absence of publication bias.

Now, let’s do Egger’s test with the multi-level model:

rma.mv(d_calc ~ sqrt(d_var_calc),  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)
## 
## Multivariate Meta-Analysis Model (k = 53; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1126  0.3356     15     no                     short_cite 
## sigma^2.2  0.0469  0.2166     52     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     53     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 51) = 120.5874, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 21.5781, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt            -0.9390  0.2870  -3.2720  0.0011  -1.5014  -0.3765   ** 
## sqrt(d_var_calc)    4.3260  0.9313   4.6452  <.0001   2.5007   6.1512  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Strong evidence for publication bias. There’s still evidence even when you control for a number of potential moderators:

rma.mv(d_calc ~   sqrt(d_var_calc) + mean_age + transitive_event_type2 + agent_argument_type2 + 
         test_mass_or_distributed + test_method + n_repetitions_video,  random = ~ 1 | short_cite/same_infant/row_id, d_var_calc, data = ma_data)
## 
## Multivariate Meta-Analysis Model (k = 53; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.0748  0.2736     15     no                     short_cite 
## sigma^2.2  0.0505  0.2247     52     no         short_cite/same_infant 
## sigma^2.3  0.0000  0.0000     53     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 46) = 93.8800, p-val < .0001
## 
## Test of Moderators (coefficients 2:7):
## QM(df = 6) = 28.6021, p-val < .0001
## 
## Model Results:
## 
##                                               estimate      se     zval    pval 
## intrcpt                                        -1.8191  0.5397  -3.3709  0.0007 
## sqrt(d_var_calc)                                5.0887  1.0835   4.6964  <.0001 
## mean_age                                        0.0002  0.0003   0.6610  0.5086 
## transitive_event_type2indirect_caused_action   -0.2038  0.1723  -1.1830  0.2368 
## agent_argument_type2pronoun                     0.6719  0.4013   1.6742  0.0941 
## test_mass_or_distributedmass                   -0.5727  0.4141  -1.3832  0.1666 
## n_repetitions_video                             0.1350  0.0562   2.4036  0.0162 
##                                                 ci.lb    ci.ub 
## intrcpt                                       -2.8768  -0.7614  *** 
## sqrt(d_var_calc)                               2.9650   7.2124  *** 
## mean_age                                      -0.0004   0.0008      
## transitive_event_type2indirect_caused_action  -0.5415   0.1339      
## agent_argument_type2pronoun                   -0.1147   1.4585    . 
## test_mass_or_distributedmass                  -1.3843   0.2388      
## n_repetitions_video                            0.0249   0.2452    * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Trim-fill

trim_fill_base <- trimfill(base_model_no_mv)
trim_fill_base
## 
## Estimated number of missing studies on the left side: 11 (SE = 4.8066)
## 
## Random-Effects Model (k = 64; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0.2891 (SE = 0.0720)
## tau (square root of estimated tau^2 value):      0.5377
## I^2 (total heterogeneity / total variability):   75.20%
## H^2 (total variability / sampling variability):  4.03
## 
## Test for Heterogeneity:
## Q(df = 63) = 210.6251, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.1871  0.0800  2.3393  0.0193  0.0303  0.3438  * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
funnel(trim_fill_base, legend=TRUE)