Get single predictor model

ma_data <- read_csv(DATA_PATH)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   se = col_double(),
##   mean_age = col_double(),
##   productive_vocab_mean = col_double(),
##   productive_vocab_median = col_double(),
##   n_train_test_pair = col_double(),
##   n_test_trial_per_pair = col_double(),
##   n_repetitions_sentence = col_double(),
##   n_repetitions_video = col_double(),
##   inclusion_certainty = col_double(),
##   n_1 = col_double(),
##   x_1 = col_double(),
##   x_2 = col_double(),
##   x_2_raw = col_double(),
##   sd_1 = col_double(),
##   sd_2 = col_double(),
##   sd_2_raw = col_double(),
##   t = col_double(),
##   d = col_double(),
##   d_calc = col_double(),
##   d_var_calc = col_double()
##   # ... with 1 more columns
## )
## See spec(...) for full column specifications.
MODERATORS <- c( "NULL", "mean_age","productive_vocab_median", "sentence_structure", "agent_argument_type", "patient_argument_type","agent_argument_number", "n_repetitions_sentence", "n_repetitions_video", "stimuli_modality", "stimuli_actor", "transitive_event_type","intransitive_event_type", "visual_stimuli_pair", "test_method","presentation_type","character_identification", "practice_phase", "test_mass_or_distributed", "n_train_test_pair", "n_test_trial_per_pair" )
mod_print <- generate_moderator_df(MODERATORS, ma_data)
## Warning in rma.mv(d_calc ~ productive_vocab_median, V = d_var_calc, random = ~1
## | : Rows with NAs omitted from model fitting.

for theoretical moderators

THEORETICAL_MODERATORS <- c("mean_age","productive_vocab_median", "sentence_structure", "agent_argument_type")

theoretical_df <- mod_print %>%
  filter(this_moderator %in% THEORETICAL_MODERATORS) %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "single", 
         type = "theoretical")

for methodological moderators

METHODOLOGICAL_MODERATORS <- c("character_identification", "practice_phase", "test_mass_or_distributed","n_repetitions_sentence") # special treatment w/ presentation type because it has three levels

methodological_df <-  mod_print %>%
  filter(this_moderator %in% METHODOLOGICAL_MODERATORS) %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "single", 
         type = "methodological")

# special treatment for presentation_type 
synchronicity_model <- rma.mv(d_calc ~ presentation_type, V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = ma_data) 

moderator <- find_parameters(synchronicity_model) %>% as.data.frame()

this_moderator_estimate <- synchronicity_model$b
this_moderator_SE <- synchronicity_model$se
this_moderator_estimate.cil <- synchronicity_model$ci.lb
this_moderator_estimate.cih <- synchronicity_model$ci.ub
this_moderator_z <- synchronicity_model$zval
this_moderator_p <- synchronicity_model$pval

synchronicity_info <- bind_cols(moderator, this_moderator_estimate, this_moderator_SE, 
          this_moderator_estimate.cil, this_moderator_estimate.cih, 
          this_moderator_z, this_moderator_p)

colnames(synchronicity_info) <- c("this_moderator", "mod_estimate", "this_moderator_SE", "mod_estimate.cil", "mod_estimate.cih", "this_moderator_z", "this_moderator_p")

synchronicity_info <- synchronicity_info %>% 
  filter(this_moderator != "(Intercept)") %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "single", 
         type = "methodological")

methodological_df <-  mod_print %>%
  filter(this_moderator %in% METHODOLOGICAL_MODERATORS) %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "single", 
         type = "methodological") %>% 
  bind_rows(synchronicity_info)

Get mega model info

for theoretical moderators

theoretical_mod <- rma.mv(d_calc ~ sentence_structure + agent_argument_type + mean_age + productive_vocab_median, V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = ma_data) 
## Warning in rma.mv(d_calc ~ sentence_structure + agent_argument_type + mean_age
## + : Rows with NAs omitted from model fitting.
theoretical_mod_print <- generate_mega_model_df(theoretical_mod)

theoretical_mega_df <- theoretical_mod_print %>% 
  filter(moderator_name != "intrcpt") %>% 
  mutate(
    this_moderator = case_when(
      moderator_name == "sentence_structuretransitive" ~ "sentence_structure", 
      moderator_name == "agent_argument_typepronoun" ~ "agent_argument_type", 
      TRUE ~ moderator_name
    ),
    mod_estimate.cih = model_ci_ub, 
    mod_estimate.cil = model_ci_lb
  )  %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "full", 
          type = "theoretical")

for methodological moderators

methodological_mod <- rma.mv(d_calc ~ character_identification + practice_phase + presentation_type + test_mass_or_distributed + n_repetitions_sentence,
                             V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = ma_data)



methodological_mod_print <- generate_mega_model_df(methodological_mod)

methodological_mega_df <- methodological_mod_print %>% 
  filter(moderator_name != "intrcpt") %>% 
  mutate(
    this_moderator = case_when(
      moderator_name == "character_identificationyes" ~ "character_identification", 
      moderator_name == "practice_phaseyes" ~ "practice_phase",
      moderator_name == "test_mass_or_distributedmass" ~ "test_mass_or_distributed",
      TRUE ~ moderator_name
    ),
    mod_estimate.cih = model_ci_ub, 
    mod_estimate.cil = model_ci_lb
  )  %>% 
  select(this_moderator, mod_estimate, mod_estimate.cih, mod_estimate.cil) %>% 
  mutate(model = "full", 
         type = "methodological")

Combine df

methodological_all_df <- bind_rows(methodological_df, methodological_mega_df)
theoretical_all_df <- bind_rows(theoretical_df, theoretical_mega_df)

Generate plot for theoretical moderators

theoretical_all_df %>% 
  ggplot(aes(x = fct_reorder(this_moderator, -mod_estimate), color = model, 
             y = mod_estimate, 
             ymin = mod_estimate.cil, 
             ymax = mod_estimate.cih, 
             group = model)) + 
  geom_pointrange(size = 0.5) + 
  scale_color_manual(values = c("grey", "red")) + 
  geom_line(data = theoretical_all_df %>% filter(model == "full"), 
            aes(x = fct_reorder(this_moderator, -mod_estimate),
                y = mod_estimate, 
                group = model), 
            color = "grey", 
            size = 0.5) + 
  coord_flip()

Generate plot for methodological moderators

methodological_all_df %>% 
    ggplot(aes(x = fct_reorder(this_moderator, -mod_estimate), color = model, 
             y = mod_estimate, 
             ymin = mod_estimate.cil, 
             ymax = mod_estimate.cih, 
             group = model)) + 
  geom_pointrange(size = 0.5) + 
  scale_color_manual(values = c("grey", "red")) + 
  geom_line(data = methodological_all_df %>% filter(model == "full"), 
            aes(x = fct_reorder(this_moderator, -mod_estimate),
                y = mod_estimate, 
                group = model), 
            color = "grey", 
            size = 0.5) + 
  coord_flip()

plot predicted values

vocab_model <- rma.mv(d_calc ~ productive_vocab_median, V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = ma_data) 
## Warning in rma.mv(d_calc ~ productive_vocab_median, V = d_var_calc, random = ~1
## | : Rows with NAs omitted from model fitting.
age_model <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = filter(ma_data, !is.na(productive_vocab_median))) 

both_model <- rma.mv(d_calc ~ mean_age + productive_vocab_median, V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/x_1,
                     method = "REML",
                     data = filter(ma_data, !is.na(productive_vocab_median))) 


predicted_val_vocab <- predict(vocab_model, addx = TRUE) %>% 
  as.data.frame() %>% 
  mutate(productive_vocab_median = X.productive_vocab_median, 
         predicted_value = pred) %>% 
  select(productive_vocab_median, predicted_value)

# how do I know if these two dataframes are organized in the same order  
predicted_val_age <- predict(age_model, addx = TRUE) %>% 
  as.data.frame() %>% 
  mutate(mean_age = X.mean_age, 
         predicted_value = pred) %>% 
  select(mean_age, predicted_value)

predicted_val_both <- predict(both_model, addx = TRUE) %>% 
  as.data.frame() %>% 
  mutate(productive_vocab_median = X.productive_vocab_median, 
         predicted_value = pred) %>% 
  select(productive_vocab_median, predicted_value)
predicted_val_vocab %>% 
  ggplot(aes(x = productive_vocab_median, y = predicted_value)) + 
  geom_point()

weird looking predicted values when using two moderators plot

predicted_val_both %>% 
  ggplot(aes(x = productive_vocab_median, y = predicted_value)) + 
  geom_point()

ma_data %>% 
  ggplot(aes(x = productive_vocab_median, y = d_calc)) + 
  geom_point() + 
  geom_line(
    data = predicted_val_vocab, 
    aes(x = productive_vocab_median, y = predicted_value)) + 
  geom_line(
    data = predicted_val_both, 
    aes(x = productive_vocab_median, y = predicted_value)) # werid looking line comes from the two predictors model 
## Warning: Removed 28 rows containing missing values (geom_point).

what’s wrong with funnel plot?

Is there a difference between two types of funnel plot? Fancier:

mixed_model <- rma.mv(d_calc ~ 1,  d_var_calc,  
                   random = ~ 1 | short_cite/same_infant/x_1, 
                   method = "REML", 
                   data=ma_data)
funnel(mixed_model)

basic:

basic_model <- rma(d_calc ~ 1,  d_var_calc,  
                   data=ma_data)
funnel(basic_model)

ok they defintiely look too similar, excerpt from r documents:

For fixed- and random-effects models (i.e., models not involving moderators), the plot shows the individual observed effect sizes or outcomes on the x-axis against the corresponding standard errors (i.e., the square root of the sampling variances) on the y-axis.

so maybe they are the same? Is there a way to take into account the random effects in the funnel? the predict function does give me what is needed:

predict(mixed_model)
## 
##    pred     se  ci.lb  ci.ub   cr.lb  cr.ub 
##  0.2465 0.1001 0.0504 0.4427 -0.6626 1.1556