library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/SyntacticBootstrappingMA
library(metafor)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading 'metafor' package (version 2.4-0). For an overview 
## and introduction to the package please type: help(metafor).
d <- read_csv(here("data/processed/syntactic_bootstrapping_tidy_data.csv"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   se = col_double(),
##   mean_age = col_double(),
##   productive_vocab_mean = col_double(),
##   productive_vocab_median = col_double(),
##   n_train_test_pair = col_double(),
##   n_test_trial_per_pair = col_double(),
##   n_repetitions_sentence = col_double(),
##   n_repetitions_video = col_double(),
##   inclusion_certainty = col_double(),
##   unique_infant = col_logical(),
##   test_method = col_logical(),
##   publication_year = col_double(),
##   n_1 = col_double(),
##   x_1 = col_double(),
##   x_2 = col_double(),
##   x_2_raw = col_double(),
##   sd_1 = col_double(),
##   sd_2 = col_double(),
##   sd_2_raw = col_double(),
##   t = col_double()
##   # ... with 5 more columns
## )
## See spec(...) for full column specifications.
alt_d <- read_csv(here("exploratory_analyses/07_PA/alt_ES.csv"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   n_1 = col_double(),
##   n_2 = col_double(),
##   x_1 = col_double(),
##   x_2 = col_double(),
##   x_2_raw = col_double(),
##   sd_1 = col_double(),
##   sd_2 = col_double(),
##   sd_2_raw = col_double(),
##   t = col_double(),
##   d = col_double(),
##   d_calc = col_logical()
## )
## See spec(...) for full column specifications.

comparison

alt_id <- alt_d %>% 
  filter(alternative_calc == "between") %>% 
  select(unique_id) %>% 
  pull

ma_subset <- d %>% 
  filter(unique_id %in% alt_id) %>% 
  filter(sentence_structure == "transitive") %>% 
  select(unique_id, short_cite, 
         same_infant, plot_label, n_1, d_calc, d_var_calc, row_id) %>% 
  mutate(calc_type = "within") %>% 
  drop_na()

alt_subset <- alt_d %>% 
  filter(unique_id %in% alt_id) %>% 
  rowid_to_column() %>% 
  mutate(pooled_SD = sqrt(((n_1 - 1) * sd_1 ^ 2 + (n_2 - 1) * sd_2 ^ 2) / (n_1 + n_2 - 2)), 
    d_calc = (x_1 - x_2) / pooled_SD, 
    d_var_calc = ((n_1 + n_2) / (n_1 * n_2)) + (d_calc ^ 2 / (2 * (n_1 + n_2)))) %>% 
  rename(row_id = rowid) %>% 
  select(unique_id, short_cite, 
         same_infant, plot_label,n_1, d_calc, d_var_calc, row_id) %>% 
  mutate(calc_type = "between")%>% 
  drop_na()

comparison_df <- bind_rows(ma_subset, 
                           alt_subset)

comparison_model <- rma.mv(d_calc ~ calc_type,
                             V = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/row_id,
                     method = "REML",
                     data = comparison_df)
comparison_model
## 
## Multivariate Meta-Analysis Model (k = 40; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed                         factor 
## sigma^2.1  0.1443  0.3799      9     no                     short_cite 
## sigma^2.2  0.0000  0.0000     37     no         short_cite/same_infant 
## sigma^2.3  0.0401  0.2003     39     no  short_cite/same_infant/row_id 
## 
## Test for Residual Heterogeneity:
## QE(df = 38) = 93.4110, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 6.1853, p-val = 0.0129
## 
## Model Results:
## 
##                  estimate      se     zval    pval    ci.lb    ci.ub 
## intrcpt            0.8864  0.1697   5.2219  <.0001   0.5537   1.2190  *** 
## calc_typewithin   -0.3394  0.1365  -2.4870  0.0129  -0.6068  -0.0719    * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

power analysis

library(metapower)
null_model <- rma(yi = d_calc, 
                    vi = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/row_id,
                     method = "REML",
                     data = d )
## Warning: Extra argument ('random') disregarded.
i2 <- null_model$I2

null_model
## 
## Random-Effects Model (k = 60; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0.2163 (SE = 0.0573)
## tau (square root of estimated tau^2 value):      0.4651
## I^2 (total heterogeneity / total variability):   72.48%
## H^2 (total variability / sampling variability):  3.63
## 
## Test for Heterogeneity:
## Q(df = 59) = 196.0659, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.2748  0.0725  3.7913  0.0001  0.1327  0.4169  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
i2
## [1] 72.4763
mpower(
effect_size = null_model$beta %>% as.numeric(),
study_size = d %>% select(n_1) %>% pull() %>% mean(),#questionable?
k = nrow(d),
i2 = i2/100,
es_type = "d",
test_type = "two-tailed",
p = 0.05,
con_table = NULL
) %>% plot_mpower()

moderator analysis

transitive_model <- rma(d_calc,
                             vi = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/row_id,
                     method = "REML",
                     data = filter(d, sentence_structure == "transitive") )
## Warning: Extra argument ('random') disregarded.
intransitive_model <- rma(d_calc,
                             vi = d_var_calc,
                    random = ~ 1 | short_cite/same_infant/row_id,
                     method = "REML",
                     data = filter(d, sentence_structure == "intransitive") )
## Warning: Extra argument ('random') disregarded.
n_study_transitive <- d %>% filter( sentence_structure == "transitive") %>% distinct(plot_label) %>% count() %>% pull()
n_study_intransitive <- d %>% filter( sentence_structure == "intransitive") %>% distinct(plot_label) %>% count() %>% pull()


mod_power(
n_groups = 2,
effect_sizes = c(transitive_model$beta %>% as.numeric(),
                 intransitive_model$beta %>% as.numeric()),
study_size =  n_study_transitive + n_study_intransitive,
k = n_study_transitive,#they are equal 
i2 = 0.75,# i2/100, the paper is recommending to use a value like this because this is an average / rule of thumb thing, pg31
es_type = "d",
p = 0.05,
con_table = NULL
) %>% plot_mod_power()