library(papaja)
library(rmarkdown)
library(tidyverse) 
library(here)
library(glue)
library(metafor)
library(knitr)
library(gridExtra)
library(here)
library(heatmaply)
library(MuMIn)
library(glmulti)
library(PRISMAstatement)
library(PublicationBias)

DATA_PATH <- here("data/processed/verbextension_tidy_data.csv") 

ma_data <- read_csv(DATA_PATH) %>% mutate(row_id = 1:n())
ma_data_exclude_outlier <- read_csv(DATA_PATH) %>% mutate(row_id = 1:n()) %>% filter(plot_label != "Abbot-Smith, Imai, Durrant,& Nurmsoo (2017) - 2h") #%>% filter(plot_label != "Abbot-Smith, Imai, Durrant,& Nurmsoo (2017) - 2f")

All Data

Forest Plot

get_MA_params <- function(moderator, df) {
  
  this_data <- df
  n = nrow(this_data)
  
  if (moderator == TRUE){
      model <- rma.mv(d_calc ~ log(mean_age), V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- model$b[2]
      this_moderator_estimate.cil <- model$ci.lb[2]
      this_moderator_estimate.cih <- model$ci.ub[2]
      this_moderator_z <- model$zval[2]
      this_moderator_p <- model$pval[2]

  } else{
      model <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- NA
      this_moderator_estimate.cil <- NA
      this_moderator_estimate.cih <- NA
      this_moderator_z <- NA
      this_moderator_p <- NA
    
  }
  
   params <- data.frame(this_moderator = moderator,
                       n = n,
                       estimate = model$b[1],
                       estimate.cil = model$ci.lb[1],
                       estimate.cih = model$ci.ub[1],
                       z = model$zval[1],
                       p = model$pval[1],
                       mod_estimate = this_moderator_estimate,
                       mod_estimate.cil = this_moderator_estimate.cil, 
                       mod_estimate.cih = this_moderator_estimate.cih,
                       moderator_z = this_moderator_z,
                       moderator_p = this_moderator_p,
                       Q = model$QE,
                       Qp = model$QEp)
  
  }

null_model<- get_MA_params(FALSE, ma_data)
#null_model
age_model <- get_MA_params(TRUE, ma_data)
all_models <- bind_rows(null_model,age_model)


mod_print <- all_models %>%
             mutate(esimate_print =  round(estimate, 2),
                    CI_print = paste0(" [", 
                                      round(estimate.cil, 2),
                                     ", ",
                                     round(estimate.cih, 2),
                                     "]"),
                   estimate_print_full = paste(esimate_print, CI_print),
                   z_print = round(z, 2),
                   p_print = round(p, 2),
                   p_print = ifelse(p_print <.001, "<.001", paste0("= ", p_print)),
                   mod_estimate_print = round(mod_estimate, 2),
                   mod_CI_print = paste0(" [", 
                                      round(mod_estimate.cil, 2),
                                     ", ",
                                     round(mod_estimate.cih, 2),
                                     "]"),
                   mod_estimate_print_full = paste(mod_estimate_print, mod_CI_print),

                   mod_z_print =  round(moderator_z, 2),
                   mod_p_print =  round(moderator_p, 2),
                   mod_p_print = ifelse(mod_p_print < .001, "<.001", 
                                        paste0("= ", mod_p_print)),
                   Q_print = round(Q, 2),
                   Qp_print = round(Qp, 2),
                   Qp_print = ifelse(Qp_print < .001, "<.001", paste0("= ", Qp_print)))

alpha = 0.05
individual_data <- ma_data %>% 
  select(short_cite, unique_id,d_calc,d_var_calc, n_1, plot_label,sentence_structure) %>% 
  mutate(cil = d_calc - (qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc)),
         cil = case_when(
          (cil < -8) ~ -8,  # truncate error bar for visibility reason 
          TRUE ~ cil
         ),
         ciu = d_calc +
               qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc), 
         meta = "no", 
         label_color = "black",
         print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = "")
         )

cumulative <- mod_print %>% 
  filter(this_moderator == FALSE) %>% 
  select (estimate, estimate.cil, estimate.cih) %>% 
  mutate(short_cite = "Meta-Analytic Effect Size",
         plot_label = "Meta-Analytic Effect Size",
         d_calc = estimate, 
         d_var_calc = NA, 
         n_1 = 99, 
         expt_num = "", 
         expt_condition = "",
         cil = estimate.cil, 
         ciu = estimate.cih, 
         sentence_structure = "cumulative",
        print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = ""),
         meta = "yes", 
        label_color = "red")

forest_data <- bind_rows(individual_data, cumulative) 
forest_data$sentence_structure <- as.factor(forest_data$sentence_structure)
forest_data$meta <- as.factor(forest_data$meta)
forest_data <- forest_data %>% 
  rowid_to_column() %>% 
  mutate(
    rowid = if_else(rowid == 0, 99, as.double(rowid)) #to always put the MA ES at bottom
  ) %>% 
  group_by(sentence_structure) %>% arrange(-rowid, .by_group = TRUE)
forest_data$plot_label <- factor(forest_data$plot_label, levels = forest_data$plot_label)



# set the neighbourhood levels in the order the occur in the data frame
label_colors <- forest_data$label_color[order(forest_data$plot_label)]

forest_data %>%  # First sort by val. This sort the dataframe but NOT the factor levels
  ggplot(aes(x = plot_label, y = d_calc)) + 
  geom_point(data = forest_data,
             aes(size=n_1, shape = sentence_structure, color = sentence_structure)) + 
  scale_color_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values = c("red", "black", "black"))+ 
  scale_size(guide = 'none') + 
  scale_shape_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values=c(18,16, 17)) +
  #guides(color = guide_legend(override.aes = list(shape = 18, shape = 16, shape = 17))) + 
  geom_linerange(aes(ymin = cil, ymax = ciu, color = sentence_structure), show.legend = FALSE) + 
  geom_segment(aes(x = plot_label, y = d_calc, xend = plot_label, yend = cil),
               linejoin = "round", 
               lineend = "round", 
               size = 0.1,
               arrow = arrow(length = unit(0.1, "inches")),
               data = filter(forest_data,cil == -8))+
  geom_hline(aes(yintercept = 0),  color = "gray44",linetype = 2) + 
  geom_hline(aes(yintercept = filter(forest_data, sentence_structure == "cumulative")$d_calc), 
             color = "red", linetype = 2) + 
  geom_text(aes(label = print_full, x = plot_label, y = 7), 
            size = 3.5, colour = label_colors) + 
  scale_y_continuous(breaks = seq(-10, 5, 1))+ 
  coord_cartesian(clip = 'on') + 
  coord_flip() + 
  ylab("Cohen's d") +
  labs(color  = "Effect Size Type",shape = "Effect Size Type") + # merge two legends 
  theme(text = element_text(size=18),
        legend.position="bottom",   
        plot.margin = unit(c(1,2,16,1), "lines"),
        legend.title = element_blank(),
        panel.background = element_blank(),
       #panel.background = element_rect(fill = "white", colour = "grey50"),
        axis.title.y = element_blank(),
        axis.text.y = element_text(colour = label_colors))

Regression Test

Funnel Plot

base_model_no_mv <-  rma(d_calc,  d_var_calc, data=ma_data)
funnel(base_model_no_mv)

Base Model Regression Test

base_model_no_mv <-  rma(d_calc,  d_var_calc, data=ma_data)
regtest(base_model_no_mv, predictor = 'sei', ret.fit = T)

## 
## Regression Test for Funnel Plot Asymmetry
## 
## model:     mixed-effects meta-regression model
## predictor: standard error
## 
## Mixed-Effects Model (k = 42; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.8721 (SE = 0.2157)
## tau (square root of estimated tau^2 value):             0.9339
## I^2 (residual heterogeneity / unaccounted variability): 92.30%
## H^2 (unaccounted variability / sampling variability):   12.99
## R^2 (amount of heterogeneity accounted for):            0.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 40) = 286.5319, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 12.3476, p-val = 0.0004
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt    1.2447  0.3561   3.4951  0.0005   0.5467   1.9427  *** 
## sei       -3.8130  1.0851  -3.5139  0.0004  -5.9398  -1.6862  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## test for funnel plot asymmetry: z = -3.5139, p = 0.0004

Multi-level Model Regression Test

rma.mv(d_calc ~ sqrt(d_var_calc),  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data)

## 
## Multivariate Meta-Analysis Model (k = 42; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.4323  0.6575     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     38     no         short_cite/same_infant 
## sigma^2.3  0.3314  0.5756     42     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 40) = 286.5319, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 21.2423, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt             1.4559  0.3585   4.0611  <.0001   0.7532   2.1585  *** 
## sqrt(d_var_calc)   -4.7179  1.0236  -4.6089  <.0001  -6.7242  -2.7116  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PublicationBias Package

P-val Plot

pval_plot(ma_data$d_calc,
ma_data$d_var_calc, alpha.select = 0.05)

### Eta analysis {.tabset}

eta curve

eta <- data.frame()

for (i in 1:20){
  df <- corrected_meta(
      ma_data$d_calc,
      ma_data$d_var_calc,
      eta= i,
      clustervar = 1:length(ma_data$d_calc),
      model = "robust",
      selection.tails = 1, # if changed to 2, the curve is flat
      favor.positive = TRUE,# if changed to False, the curve is flat
      alpha.select = 0.05,
      CI.level = 0.95,
      small = TRUE)
  eta <- bind_rows(eta, df)
  
}
eta %>% ggplot(aes(x = eta, y = pval)) + 
  geom_point() + 
  geom_line() + 
  geom_hline(yintercept = 0.05, color = "red", type = 2) + 
  labs(title = "eta vs pval")

#### plot

eta.list = as.list( c( 200, 150, 100, 50, 40, 30, 20, rev( seq(1,15,1) ) ) )
res.list = lapply(eta.list, function(x){
cat("\n Working on eta = ", x)
return(corrected_meta(
      ma_data$d_calc,
      ma_data$d_var_calc,
      eta= x,
      clustervar = 1:length(ma_data$d_calc),
      model = "robust",
      selection.tails = 1,
      favor.positive = TRUE,
      alpha.select = 0.05,
      CI.level = 0.95,
      small = TRUE) )
})

## 
##  Working on eta =  200
##  Working on eta =  150
##  Working on eta =  100
##  Working on eta =  50
##  Working on eta =  40
##  Working on eta =  30
##  Working on eta =  20
##  Working on eta =  15
##  Working on eta =  14
##  Working on eta =  13
##  Working on eta =  12
##  Working on eta =  11
##  Working on eta =  10
##  Working on eta =  9
##  Working on eta =  8
##  Working on eta =  7
##  Working on eta =  6
##  Working on eta =  5
##  Working on eta =  4
##  Working on eta =  3
##  Working on eta =  2
##  Working on eta =  1

res.df = as.data.frame(do.call("rbind",res.list)) 

ggplot( data = res.df, aes( x = eta, y = est ) ) +
geom_ribbon( data = res.df, aes( x = eta, ymin = lo, ymax = hi ), fill = "gray" ) +
geom_line( lwd = 1.2 ) +
xlab( bquote( eta ) ) +
ylab( bquote( hat(mu)[eta] ) ) +
theme_classic()

significance_funnel

significance_funnel(
ma_data$d_calc,
ma_data$d_var_calc,
xmin = min(ma_data$d_calc),
xmax = max(ma_data$d_calc),
ymin = 0,
ymax = max(sqrt(ma_data$d_var_calc)),
xlab = "Point estimate",
ylab = "Estimated standard error",
favor.positive = TRUE,
est.all = NA,
est.N = NA,
alpha.select = 0.05,
plot.pooled = TRUE
)

Estimate the S_val

sval <- data.frame()

for (i in seq(0, 0.1, by = 0.02)){
  s <- svalue(
          ma_data$d_calc,
          ma_data$d_var_calc,
          q = i,
          clustervar = 1:length(ma_data$d_calc),
          model = "robust",
          alpha.select = 0.05,
          eta.grid.hi = 200,
          favor.positive = TRUE,
          CI.level = 0.95,
          small = TRUE,
          return.worst.meta = FALSE
          )
  s <- s %>% mutate(q_val = i)

  sval <- bind_rows(sval, s) 
}

sval

##   sval.est sval.ci k.affirmative k.nonaffirmative signs.recoded q_val
## 1 1.528745      --            15               27         FALSE  0.00
## 2 1.416489      --            15               27         FALSE  0.02
## 3 1.315782      --            15               27         FALSE  0.04
## 4 1.224928      --            15               27         FALSE  0.06
## 5 1.142530      --            15               27         FALSE  0.08
## 6 1.067469      --            15               27         FALSE  0.10

null and age model

null <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data)
age <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data)
age

## 
## Multivariate Meta-Analysis Model (k = 42; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.5484  0.7405     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     38     no         short_cite/same_infant 
## sigma^2.3  0.2502  0.5002     42     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 40) = 285.0295, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 9.3995, p-val = 0.0022
## 
## Model Results:
## 
##           estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt    -1.2523  0.5271  -2.3760  0.0175  -2.2853  -0.2193   * 
## mean_age    0.0014  0.0004   3.0659  0.0022   0.0005   0.0022  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ma_data %>% 
  mutate(age_months = mean_age/30.44) %>% 
  ggplot(aes(x = age_months, y = d_calc, size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Verb Extension effect size vs. Age (months)")

Excluding Outlier

overall_mean <- mean(ma_data$d_calc)
overall_sd <- sd(ma_data$d_calc)

ma_data %>% 
  filter(d_calc > overall_mean + (2*(overall_sd)) | d_calc < overall_mean - (2*(overall_sd)) ) %>% 
  select(plot_label)

## # A tibble: 1 x 1
##   plot_label                                      
##   <chr>                                           
## 1 Abbot-Smith, Imai, Durrant,& Nurmsoo (2017) - 2h

No outlier Forest Plot

get_MA_params <- function(moderator, df) {
  
  this_data <- df
  n = nrow(this_data)
  
  if (moderator == TRUE){
      model <- rma.mv(d_calc ~ log(mean_age), V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- model$b[2]
      this_moderator_estimate.cil <- model$ci.lb[2]
      this_moderator_estimate.cih <- model$ci.ub[2]
      this_moderator_z <- model$zval[2]
      this_moderator_p <- model$pval[2]

  } else{
      model <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/x_1,
                      method = "REML",
                      data = this_data)
      
      this_moderator_estimate <- NA
      this_moderator_estimate.cil <- NA
      this_moderator_estimate.cih <- NA
      this_moderator_z <- NA
      this_moderator_p <- NA
    
  }
  
   params <- data.frame(this_moderator = moderator,
                       n = n,
                       estimate = model$b[1],
                       estimate.cil = model$ci.lb[1],
                       estimate.cih = model$ci.ub[1],
                       z = model$zval[1],
                       p = model$pval[1],
                       mod_estimate = this_moderator_estimate,
                       mod_estimate.cil = this_moderator_estimate.cil, 
                       mod_estimate.cih = this_moderator_estimate.cih,
                       moderator_z = this_moderator_z,
                       moderator_p = this_moderator_p,
                       Q = model$QE,
                       Qp = model$QEp)
  
  }

null_model<- get_MA_params(FALSE, ma_data_exclude_outlier)
#null_model
age_model <- get_MA_params(TRUE, ma_data_exclude_outlier)
all_models <- bind_rows(null_model,age_model)


mod_print <- all_models %>%
             mutate(esimate_print =  round(estimate, 2),
                    CI_print = paste0(" [", 
                                      round(estimate.cil, 2),
                                     ", ",
                                     round(estimate.cih, 2),
                                     "]"),
                   estimate_print_full = paste(esimate_print, CI_print),
                   z_print = round(z, 2),
                   p_print = round(p, 2),
                   p_print = ifelse(p_print <.001, "<.001", paste0("= ", p_print)),
                   mod_estimate_print = round(mod_estimate, 2),
                   mod_CI_print = paste0(" [", 
                                      round(mod_estimate.cil, 2),
                                     ", ",
                                     round(mod_estimate.cih, 2),
                                     "]"),
                   mod_estimate_print_full = paste(mod_estimate_print, mod_CI_print),

                   mod_z_print =  round(moderator_z, 2),
                   mod_p_print =  round(moderator_p, 2),
                   mod_p_print = ifelse(mod_p_print < .001, "<.001", 
                                        paste0("= ", mod_p_print)),
                   Q_print = round(Q, 2),
                   Qp_print = round(Qp, 2),
                   Qp_print = ifelse(Qp_print < .001, "<.001", paste0("= ", Qp_print)))

alpha = 0.05
individual_data <- ma_data_exclude_outlier %>% 
  select(short_cite, unique_id,d_calc,d_var_calc, n_1, plot_label,sentence_structure) %>% 
  mutate(cil = d_calc - (qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc)),
         cil = case_when(
          (cil < -8) ~ -8,  # truncate error bar for visibility reason 
          TRUE ~ cil
         ),
         ciu = d_calc +
               qnorm(alpha / 2, lower.tail = FALSE) * sqrt(d_var_calc), 
         meta = "no", 
         label_color = "black",
         print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = "")
         )

cumulative <- mod_print %>% 
  filter(this_moderator == FALSE) %>% 
  select (estimate, estimate.cil, estimate.cih) %>% 
  mutate(short_cite = "Meta-Analytic Effect Size",
         plot_label = "Meta-Analytic Effect Size",
         d_calc = estimate, 
         d_var_calc = NA, 
         n_1 = 99, 
         expt_num = "", 
         expt_condition = "",
         cil = estimate.cil, 
         ciu = estimate.cih, 
         sentence_structure = "cumulative",
        print_full = paste(round(d_calc,2), " [",round(cil,2),", ",round(ciu,2), "]", sep = ""),
         meta = "yes", 
        label_color = "red")

forest_data <- bind_rows(individual_data, cumulative) 
forest_data$sentence_structure <- as.factor(forest_data$sentence_structure)
forest_data$meta <- as.factor(forest_data$meta)
forest_data <- forest_data %>% 
  rowid_to_column() %>% 
  mutate(
    rowid = if_else(rowid == 0, 99, as.double(rowid)) #to always put the MA ES at bottom
  ) %>% 
  group_by(sentence_structure) %>% arrange(-rowid, .by_group = TRUE)
forest_data$plot_label <- factor(forest_data$plot_label, levels = forest_data$plot_label)



# set the neighbourhood levels in the order the occur in the data frame
label_colors <- forest_data$label_color[order(forest_data$plot_label)]

forest_data %>%  # First sort by val. This sort the dataframe but NOT the factor levels
  ggplot(aes(x = plot_label, y = d_calc)) + 
  geom_point(data = forest_data,
             aes(size=n_1, shape = sentence_structure, color = sentence_structure)) + 
  scale_color_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values = c("red", "black", "black"))+ 
  scale_size(guide = 'none') + 
  scale_shape_manual(breaks = c("cumulative", "intransitive","transitive"),
                     values=c(18,16, 17)) +
  #guides(color = guide_legend(override.aes = list(shape = 18, shape = 16, shape = 17))) + 
  geom_linerange(aes(ymin = cil, ymax = ciu, color = sentence_structure), show.legend = FALSE) + 
  geom_segment(aes(x = plot_label, y = d_calc, xend = plot_label, yend = cil),
               linejoin = "round", 
               lineend = "round", 
               size = 0.1,
               arrow = arrow(length = unit(0.1, "inches")),
               data = filter(forest_data,cil == -8))+
  geom_hline(aes(yintercept = 0),  color = "gray44",linetype = 2) + 
  geom_hline(aes(yintercept = filter(forest_data, sentence_structure == "cumulative")$d_calc), 
             color = "red", linetype = 2) + 
  geom_text(aes(label = print_full, x = plot_label, y = 7), 
            size = 3.5, colour = label_colors) + 
  scale_y_continuous(breaks = seq(-10, 5, 1))+ 
  coord_cartesian(clip = 'on') + 
  coord_flip() + 
  ylab("Cohen's d") +
  labs(color  = "Effect Size Type",shape = "Effect Size Type") + # merge two legends 
  theme(text = element_text(size=18),
        legend.position="bottom",   
        plot.margin = unit(c(1,2,16,1), "lines"),
        legend.title = element_blank(),
        panel.background = element_blank(),
       #panel.background = element_rect(fill = "white", colour = "grey50"),
        axis.title.y = element_blank(),
        axis.text.y = element_text(colour = label_colors))

No outlier Regression Test

Funnel Plot No outlier

base_model <-  rma(d_calc,  d_var_calc,  
                         data=ma_data_exclude_outlier)

funnel(base_model)

No outlier Base Model Reg test

regtest(base_model, predictor = 'sei', ret.fit = T)

## 
## Regression Test for Funnel Plot Asymmetry
## 
## model:     mixed-effects meta-regression model
## predictor: standard error
## 
## Mixed-Effects Model (k = 41; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.5671 (SE = 0.1474)
## tau (square root of estimated tau^2 value):             0.7531
## I^2 (residual heterogeneity / unaccounted variability): 88.84%
## H^2 (unaccounted variability / sampling variability):   8.96
## R^2 (amount of heterogeneity accounted for):            3.51%
## 
## Test for Residual Heterogeneity:
## QE(df = 39) = 240.0413, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 1.5312, p-val = 0.2159
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt   -0.3665  0.4529  -0.8094  0.4183  -1.2541  0.5211    
## sei        1.8839  1.5224   1.2374  0.2159  -1.1001  4.8678    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## test for funnel plot asymmetry: z = 1.2374, p = 0.2159

No outlier Multi-level Model Regression Test

rma.mv(d_calc ~ sqrt(d_var_calc),  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data_exclude_outlier)

## 
## Multivariate Meta-Analysis Model (k = 41; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.2363  0.4861     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     37     no         short_cite/same_infant 
## sigma^2.3  0.3102  0.5570     41     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 39) = 240.0413, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.1777, p-val = 0.6734
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt             0.3744  0.4745   0.7890  0.4301  -0.5556  1.3045    
## sqrt(d_var_calc)   -0.6843  1.6235  -0.4215  0.6734  -3.8664  2.4977    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No outlier Multi-level Model Regression Test with Age

rma.mv(d_calc ~ sqrt(d_var_calc) + mean_age,  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data_exclude_outlier)

## 
## Multivariate Meta-Analysis Model (k = 41; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.4204  0.6484     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     37     no         short_cite/same_infant 
## sigma^2.3  0.2360  0.4858     41     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 38) = 238.1674, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 6.2329, p-val = 0.0443
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt            -0.7324  0.7275  -1.0066  0.3141  -2.1583  0.6936    
## sqrt(d_var_calc)   -0.5323  1.6328  -0.3260  0.7444  -3.7325  2.6679    
## mean_age            0.0010  0.0004   2.3484  0.0189   0.0002  0.0018  * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No outlier PublicationBias Package

No outlier P-val Plot

pval_plot(ma_data_exclude_outlier$d_calc,
ma_data$d_var_calc, alpha.select = 0.05)

### Eta analysis {.tabset}

eta curve

eta <- data.frame()

for (i in 1:20){
  df <- corrected_meta(
      ma_data_exclude_outlier$d_calc,
      ma_data_exclude_outlier$d_var_calc,
      eta= i,
      clustervar = 1:length(ma_data_exclude_outlier$d_calc),
      model = "robust",
      selection.tails = 1, # if changed to 2, the curve is flat
      favor.positive = TRUE,# if changed to False, the curve is flat
      alpha.select = 0.05,
      CI.level = 0.95,
      small = TRUE)
  eta <- bind_rows(eta, df)
  
}
eta %>% ggplot(aes(x = eta, y = pval)) + 
  geom_point() + 
  geom_line() + 
  geom_hline(yintercept = 0.05, color = "red", type = 2) + 
  labs(title = "eta vs pval")

#### plot

eta.list = as.list( c( 200, 150, 100, 50, 40, 30, 20, rev( seq(1,15,1) ) ) )
res.list = lapply(eta.list, function(x){
cat("\n Working on eta = ", x)
return(corrected_meta(
      ma_data_exclude_outlier$d_calc,
      ma_data_exclude_outlier$d_var_calc,
      eta= x,
      clustervar = 1:length(ma_data_exclude_outlier$d_calc),
      model = "robust",
      selection.tails = 1,
      favor.positive = TRUE,
      alpha.select = 0.05,
      CI.level = 0.95,
      small = TRUE) )
})

## 
##  Working on eta =  200
##  Working on eta =  150
##  Working on eta =  100
##  Working on eta =  50
##  Working on eta =  40
##  Working on eta =  30
##  Working on eta =  20
##  Working on eta =  15
##  Working on eta =  14
##  Working on eta =  13
##  Working on eta =  12
##  Working on eta =  11
##  Working on eta =  10
##  Working on eta =  9
##  Working on eta =  8
##  Working on eta =  7
##  Working on eta =  6
##  Working on eta =  5
##  Working on eta =  4
##  Working on eta =  3
##  Working on eta =  2
##  Working on eta =  1

res.df = as.data.frame(do.call("rbind",res.list)) 

ggplot( data = res.df, aes( x = eta, y = est ) ) +
geom_ribbon( data = res.df, aes( x = eta, ymin = lo, ymax = hi ), fill = "gray" ) +
geom_line( lwd = 1.2 ) +
xlab( bquote( eta ) ) +
ylab( bquote( hat(mu)[eta] ) ) +
theme_classic()

significance_funnel

significance_funnel(
ma_data_exclude_outlier$d_calc,
ma_data_exclude_outlier$d_var_calc,
xmin = min(ma_data_exclude_outlier$d_calc),
xmax = max(ma_data_exclude_outlier$d_calc),
ymin = 0,
ymax = max(sqrt(ma_data_exclude_outlier$d_var_calc)),
xlab = "Point estimate",
ylab = "Estimated standard error",
favor.positive = TRUE,
est.all = NA,
est.N = NA,
alpha.select = 0.05,
plot.pooled = TRUE
)

Estimate the S_val

sval <- data.frame()

for (i in seq(0, 0.1, by = 0.02)){
  s <- svalue(
          ma_data_exclude_outlier$d_calc,
          ma_data_exclude_outlier$d_var_calc,
          q = i,
          clustervar = 1:length(ma_data_exclude_outlier$d_calc),
          model = "robust",
          alpha.select = 0.05,
          eta.grid.hi = 200,
          favor.positive = TRUE,
          CI.level = 0.95,
          small = TRUE,
          return.worst.meta = FALSE
          )
  s <- s %>% mutate(q_val = i)

  sval <- bind_rows(sval, s) 
}

sval

##   sval.est sval.ci k.affirmative k.nonaffirmative signs.recoded q_val
## 1 2.005466      --            15               26         FALSE  0.00
## 2 1.825846      --            15               26         FALSE  0.02
## 3 1.669893      --            15               26         FALSE  0.04
## 4 1.533245      --            15               26         FALSE  0.06
## 5 1.412483      --            15               26         FALSE  0.08
## 6 1.304994      --            15               26         FALSE  0.10

Null and age model

null <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data_exclude_outlier)
age <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data_exclude_outlier)
age

## 
## Multivariate Meta-Analysis Model (k = 41; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.4016  0.6337     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     37     no         short_cite/same_infant 
## sigma^2.3  0.2291  0.4786     41     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 39) = 251.1060, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 6.1495, p-val = 0.0131
## 
## Model Results:
## 
##           estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt    -0.8924  0.4860  -1.8361  0.0663  -1.8450  0.0602  . 
## mean_age    0.0010  0.0004   2.4798  0.0131   0.0002  0.0018  * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ma_data_exclude_outlier %>% 
  mutate(age_months = mean_age/30.44) %>% 
  ggplot(aes(x = age_months, y = d_calc, size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Verb Extension effect size vs. Age (months)")

explore stimuli contrast

sc <- rma.mv(d_calc ~ stimuli_contrast, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data_exclude_outlier)

sc

## 
## Multivariate Meta-Analysis Model (k = 41; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1014  0.3185     11     no                     short_cite 
## sigma^2.2  0.0000  0.0000     37     no         short_cite/same_infant 
## sigma^2.3  0.3149  0.5612     41     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 38) = 200.9824, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 6.6321, p-val = 0.0363
## 
## Model Results:
## 
##                                estimate      se     zval    pval    ci.lb 
## intrcpt                          0.8869  0.3071   2.8880  0.0039   0.2850 
## stimuli_contrastno_change       -0.9450  0.5403  -1.7492  0.0802  -2.0039 
## stimuli_contrastobject_change   -0.8830  0.3520  -2.5088  0.0121  -1.5728 
##                                  ci.ub 
## intrcpt                         1.4889  ** 
## stimuli_contrastno_change       0.1138   . 
## stimuli_contrastobject_change  -0.1932   * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ma_data_exclude_outlier %>% 
  mutate(age_months = mean_age/30.44) %>% 
  ggplot(aes(x = age_months, y = d_calc, color = stimuli_contrast)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Verb Extension effect size vs. Age (months)")

ma_data_object_change <- ma_data %>% 
                           filter(stimuli_contrast == "object_change") 

oc_only <- rma.mv(d_calc, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data_object_change)

oc_age <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
                      random = ~ 1 | short_cite/same_infant/row_id,
                      method = "REML",
                      data = ma_data_object_change)

summary(oc_only)

## 
## Multivariate Meta-Analysis Model (k = 27; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -44.5396   89.0791   97.0791  102.1115   98.9839   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1381  0.3716      8     no                     short_cite 
## sigma^2.2  0.2694  0.5190     27     no         short_cite/same_infant 
## sigma^2.3  0.2694  0.5190     27     no  short_cite/same_infant/row_id 
## 
## Test for Heterogeneity:
## Q(df = 26) = 191.6098, p-val < .0001
## 
## Model Results:
## 
## estimate      se     zval    pval    ci.lb   ci.ub 
##  -0.0475  0.2094  -0.2270  0.8204  -0.4579  0.3628    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(oc_age)

## 
## Multivariate Meta-Analysis Model (k = 27; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -41.3534   82.7068   92.7068   98.8012   95.8647   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.7506  0.8664      8     no                     short_cite 
## sigma^2.2  0.1570  0.3962     27     no         short_cite/same_infant 
## sigma^2.3  0.1570  0.3962     27     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 25) = 190.2205, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 9.4321, p-val = 0.0021
## 
## Model Results:
## 
##           estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt    -1.6512  0.6423  -2.5710  0.0101  -2.9100  -0.3924   * 
## mean_age    0.0016  0.0005   3.0712  0.0021   0.0006   0.0026  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reg test with object_change, null model

publication bias yes:

rma.mv(d_calc ~ sqrt(d_var_calc),  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data_object_change)

## 
## Multivariate Meta-Analysis Model (k = 27; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1162  0.3409      8     no                     short_cite 
## sigma^2.2  0.1403  0.3746     27     no         short_cite/same_infant 
## sigma^2.3  0.1403  0.3746     27     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 25) = 140.9728, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 37.8219, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt             1.6922  0.3265   5.1824  <.0001   1.0522   2.3321  *** 
## sqrt(d_var_calc)   -6.6026  1.0736  -6.1499  <.0001  -8.7069  -4.4984  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reg test with object_change, with age

rma.mv(d_calc ~ sqrt(d_var_calc) + mean_age,  d_var_calc,  
                         random = ~ 1 | short_cite/same_infant/row_id, data=ma_data_object_change)

## 
## Multivariate Meta-Analysis Model (k = 27; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.2698  0.5194      8     no                     short_cite 
## sigma^2.2  0.1141  0.3378     27     no         short_cite/same_infant 
## sigma^2.3  0.1141  0.3378     27     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 24) = 140.9529, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 41.3996, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt             1.0364  0.6067   1.7082  0.0876  -0.1528   2.2256    . 
## sqrt(d_var_calc)   -6.3867  1.0861  -5.8804  <.0001  -8.5155  -4.2580  *** 
## mean_age            0.0006  0.0004   1.4025  0.1608  -0.0002   0.0014      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Verb_Extension_Explore

Anjie Cao and Molly Lewis

8/24/2020

All Data

Forest Plot

Regression Test

Funnel Plot

Base Model Regression Test

Multi-level Model Regression Test

PublicationBias Package

P-val Plot

eta curve

significance_funnel

Estimate the S_val

null and age model

Excluding Outlier

No outlier Forest Plot

No outlier Regression Test

Funnel Plot No outlier

No outlier Base Model Reg test

No outlier Multi-level Model Regression Test

No outlier Multi-level Model Regression Test with Age

No outlier PublicationBias Package

No outlier P-val Plot

eta curve

significance_funnel

Estimate the S_val

Null and age model

explore stimuli contrast

reg test with object_change, null model

reg test with object_change, with age