library(tidyverse)
library(googlesheets4)
library(metafor)
library(knitr)
library(gridExtra)
source("calculate_es_bootstrapping.R")
source("tidy_data_bootstrapping.R")
GOOGLE_SHEET_ID <- "1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o"

read_raw_data_and_clean("1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o")
write_tidy_sheet("1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o")
ma_data <- read_sheet(MA_DATA_GOOGLE_SHEET_ID, "MA data tidy with ES NEW")


ma_data <- ma_data %>% filter(!is.na(d_calc)) %>% mutate(practice_phase = ifelse(practice_phase == "NA","no",practice_phase),
                                                         character_identification = ifelse(character_identification == "NA", "no", character_identification),
                                                         presentation_type = ifelse(presentation_type == "immediate-after", "immediate_after", ifelse(presentation_type == "Immediate-after", "immediate_after", presentation_type)),
                                                         patient_argument_type = ifelse(patient_argument_type == "N/A", "NA", patient_argument_type),
                                                         population_type = ifelse(population_type == "typical_developing", "typically_developing", population_type),
                                                         agent_argument_type = ifelse(agent_argument_type == "pronoun_and_noun" | agent_argument_type == "pronoung_and_noun", "noun_and_pronoun", agent_argument_type),
                                                         test_type = ifelse(test_type == "actor", "agent", test_type))
ma_data %>% filter(inclusion_certainty == 2)-> ma_data

Data Overview

number of differnet effectsizes
number of conditions included

N_effect_sizes <- length((!is.na(ma_data$d_calc))&!(ma_data$d_calc ==0))
N_papers <- length(unique(ma_data$unique_id))

There are 51 effect sizes collected from 12 different papers.

Variable Summary

distribution of sample size

summary(ma_data$n_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8.00    8.00   12.00   13.69   16.50   24.00

ma_data %>% 
  ggplot(aes(x = n_1)) + 
  geom_histogram() + 
  geom_vline(xintercept =mean(ma_data$n_1)) +
  stat_bin(binwidth = 2) + 
  labs(title = "Distribution of sample size")

distribtuion of x_1

summary(ma_data$x_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0800  0.4976  0.5600  0.5469  0.6350  0.7300

ma_data %>% 
  ggplot(aes(x = x_1)) + 
  geom_histogram() + 
  stat_bin(binwidth = 0.05) + 
  geom_vline(xintercept =mean(ma_data$x_1))+
  labs(title = "Distribution of x_1")

- distribtuion of sd_1

summary(ma_data$SD_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0200  0.0750  0.1400  0.1714  0.2000  0.4900

ma_data %>% 
  ggplot(aes(x =SD_1 )) + 
  geom_histogram() + 
  stat_bin(binwidth = 0.01) + 
  geom_vline(xintercept =mean(ma_data$SD_1))+
  labs(title = "Distribution of sd_1")

distribution of d_calc

summary(ma_data$d_calc)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.80000 -0.02427  0.45000  0.69861  1.26707  4.11842

ma_data %>% 
  ggplot(aes(x =d_calc )) + 
  geom_histogram() + 
  geom_vline(xintercept = mean(ma_data$d_calc))+
  labs(title = "Distribution of d_calc")

summary for all categorical variables

ma_data %>% group_by(sentence_structure) %>% 
  summarize(count = n()) %>% ggplot(aes(x=sentence_structure, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8)) ->p1

ma_data %>% group_by(language) %>% 
  summarize(count = n()) %>% ggplot(aes(x=language, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8)) ->p2

ma_data %>% group_by(population_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=population_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p3

ma_data %>% group_by(agent_argument_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=agent_argument_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p4

ma_data %>% group_by(patient_argument_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=patient_argument_type , y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p5

ma_data %>% group_by(stimuli_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_type  , y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p6

ma_data %>% group_by(stimuli_modality) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_modality, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p7

ma_data %>% group_by(stimuli_actor) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_actor, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p8

ma_data %>% group_by(presentation_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=presentation_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p9

ma_data %>% group_by(character_identification) %>% 
  summarize(count = n()) %>% ggplot(aes(x=character_identification, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p10

ma_data %>% group_by(practice_phase) %>% 
  summarize(count = n()) %>% ggplot(aes(x=practice_phase, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p11

ma_data %>% group_by(test_mass_or_distributed) %>% 
  summarize(count = n()) %>% ggplot(aes(x=test_mass_or_distributed, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p12

margin = theme(plot.margin = unit(c(2,2,2,2), "cm"))
grid.arrange(p1, p2, p3, p4,p5,p6,p7,p8,p9,p10,p11,p12, nrow = 3)

age

age only

# all ages (days)

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic() +
  theme(legend.position = "none")

# all ages (months)

ggplot(ma_data, aes(x = (mean_age/30.44), 
                                 y = d_calc, 
                                 size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months)") +
  theme_classic() +
  theme(legend.position = "none")

# Younger than 50 months 

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot( mapping = aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot(mapping = aes(x = mean_age/30.44, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

# younger than 36 months
  
ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot(mapping = aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot(mapping = aes(x = mean_age/30.44, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

## age and sentence structure

ma_data_no_bare_verb <- ma_data %>% filter(sentence_structure != "bare_verb")

# all ages (days)
ggplot(data = ma_data_no_bare_verb, aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic()

# all ages (months)

ggplot(data = ma_data_no_bare_verb, aes(x = (mean_age/30.44), y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic()

# Younger than 50 months 
ma_data_no_bare_verb_less_50 <- ma_data_no_bare_verb %>% filter(mean_age < 50*30.44 |mean_age==50*30.44)

ggplot(data = ma_data_no_bare_verb_less_50, mapping = aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic()

ggplot(data = ma_data_no_bare_verb_less_50, mapping = aes(x = mean_age/30.44, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic()

# younger than 36 months

ma_data_no_bare_verb_less_36 <- ma_data_no_bare_verb %>% filter(mean_age < 36*30.44|mean_age  == 36*30.44)

ggplot(data = ma_data_no_bare_verb_less_36, mapping = aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 36 months") +
  theme_classic()

ggplot(data=ma_data_no_bare_verb_less_36, mapping = aes(x = mean_age/30.44, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 36 months") +
  theme_classic()

age and others

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = population_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by population type") +
  theme_classic() -> ae1


ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = stimuli_modality)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by stimuli_modality") +
  theme_classic() -> ae2

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = stimuli_actor)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by stimuli_actor") +
  theme_classic() -> ae3

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = character_identification)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by character_identification") +
  theme_classic() -> ae4

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = practice_phase)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by practice phase") +
  theme_classic() -> ae5

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = test_mass_or_distributed)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by test type") +
  theme_classic() -> ae6

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = presentation_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by presentation type") +
  theme_classic() -> ae7

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = test_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by test type") +
  theme_classic() -> ae8

grid.arrange(ae1,ae2,ae3,ae4, nrow = 2)

grid.arrange(ae5,ae6,ae7, ae8, nrow =2)

ae1

ae2

ae3

ae4

ae5

ae6

ae7

ae8

Models

m1 <- rma.mv(d_calc, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m1)

## 
## Multivariate Meta-Analysis Model (k = 51; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -88.9977  177.9954  181.9954  185.8194  182.2507   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.7417  0.8612     12     no  short_cite 
## 
## Test for Heterogeneity:
## Q(df = 50) = 337.6601, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.7130  0.2570  2.7740  0.0055  0.2092  1.2167  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age

m2 <- rma.mv(d_calc ~ mean_age , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m2)

## 
## Multivariate Meta-Analysis Model (k = 51; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -85.2584  170.5169  176.5169  182.1923  177.0502   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8360  0.9143     12     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 49) = 325.9645, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 6.8727, p-val = 0.0088
## 
## Model Results:
## 
##           estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt     0.4132  0.2955  1.3983  0.1620  -0.1660  0.9923     
## mean_age    0.0004  0.0001  2.6216  0.0088   0.0001  0.0006  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age + sentence structure

m3 <- rma.mv(d_calc ~ mean_age + sentence_structure , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_no_bare_verb)
summary(m3)

## 
## Multivariate Meta-Analysis Model (k = 51; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -79.0175  158.0350  166.0350  173.5198  166.9652   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8939  0.9454     12     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 325.4158, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 19.5485, p-val < .0001
## 
## Model Results:
## 
##                               estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                         0.1248  0.3143  0.3973  0.6912  -0.4911  0.7408 
## mean_age                        0.0004  0.0001  2.5685  0.0102   0.0001  0.0006 
## sentence_structuretransitive    0.4435  0.1249  3.5507  0.0004   0.1987  0.6884 
##  
## intrcpt 
## mean_age                        * 
## sentence_structuretransitive  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age + sentence structure & interaction??

m4 <- rma.mv(d_calc ~ mean_age + sentence_structure + mean_age*sentence_structure , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_no_bare_verb)
summary(m4)

## 
## Multivariate Meta-Analysis Model (k = 51; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -78.3885  156.7770  166.7770  176.0278  168.2405   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8770  0.9365     12     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 47) = 319.6852, p-val < .0001
## 
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 20.6925, p-val = 0.0001
## 
## Model Results:
## 
##                                        estimate      se     zval    pval 
## intrcpt                                 -0.4842  0.6430  -0.7531  0.4514 
## mean_age                                 0.0013  0.0008   1.4893  0.1364 
## sentence_structuretransitive             1.0328  0.5581   1.8505  0.0642 
## mean_age:sentence_structuretransitive   -0.0009  0.0008  -1.0838  0.2785 
##                                          ci.lb   ci.ub 
## intrcpt                                -1.7445  0.7760    
## mean_age                               -0.0004  0.0029    
## sentence_structuretransitive           -0.0611  2.1266  . 
## mean_age:sentence_structuretransitive  -0.0025  0.0007    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#vocab

productive_vocab_median has more entries than productive_vocab_mean, so using productive_vocab_median here

ma_data %>% 
  filter(productive_vocab_median != "NA") %>% 
  mutate(productive_vocab_median = as.numeric(productive_vocab_median))-> ma_data_vocab_median

#vocab ## vocab and age

ma_data_vocab_median %>% 
  ggplot(aes(x = productive_vocab_median, y = mean_age)) +
  geom_point() + 
  geom_smooth(method = "lm") + 
  labs(title = "median productive vocabulary and age by day")

vocab and effect size

ma_data_vocab_median %>% 
  ggplot(aes(x = productive_vocab_median, y = d_calc)) +
  geom_point() + 
  geom_smooth(method = "lm") + 
  labs(title = "median productive vocabulary and sample size")

model:

m4 <- rma.mv(d_calc ~ productive_vocab_median  , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_vocab_median)
summary(m4)

## 
## Multivariate Meta-Analysis Model (k = 30; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -53.3769  106.7538  112.7538  116.7504  113.7538   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6453  0.8033      6     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 28) = 145.2886, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.0116, p-val = 0.9142
## 
## Model Results:
## 
##                          estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                    1.1812  0.4430  2.6666  0.0077   0.3130  2.0494  ** 
## productive_vocab_median    0.0008  0.0070  0.1078  0.9142  -0.0130  0.0145     
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

n_repetition sentence

age and n_repetition

older kiddos have less repetitions

ma_data %>% 
  ggplot(mapping = aes(x = mean_age, y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all)") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot( mapping = aes(x = mean_age, y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot( mapping = aes(x = mean_age,y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

does not seem to correlate with effect sizes

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = test_mass_or_distributed)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb") +
  theme_classic()

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = character_identification)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb (break down by character identification)") +
  theme_classic()

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = practice_phase)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb (break down by practice phase)") +
  theme_classic()

model

m5 <- rma.mv(d_calc ~ n_repetitions_sentence , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m5)

## 
## Multivariate Meta-Analysis Model (k = 49; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -87.2647  174.5294  180.5294  186.0798  181.0875   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8013  0.8952     11     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 47) = 321.6050, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.4687, p-val = 0.4936
## 
## Model Results:
## 
##                         estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                   0.6443  0.3181  2.0255  0.0428   0.0208  1.2677  * 
## n_repetitions_sentence    0.0126  0.0185  0.6846  0.4936  -0.0235  0.0488    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m6 <- rma.mv(d_calc ~ n_repetitions_sentence + mean_age, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m6)

## 
## Multivariate Meta-Analysis Model (k = 49; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -83.4956  166.9912  174.9912  182.3058  175.9668   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8915  0.9442     11     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 46) = 319.7277, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 7.7215, p-val = 0.0211
## 
## Model Results:
## 
##                         estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                   0.3017  0.3553  0.8492  0.3958  -0.3947  0.9981     
## n_repetitions_sentence    0.0165  0.0186  0.8867  0.3752  -0.0200  0.0529     
## mean_age                  0.0004  0.0001  2.6969  0.0070   0.0001  0.0006  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m7 <- rma.mv(d_calc ~ n_repetitions_sentence + mean_age + n_repetitions_sentence * mean_age, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m7)

## 
## Multivariate Meta-Analysis Model (k = 49; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -82.6257  165.2514  175.2514  184.2847  176.7898   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.9310  0.9649     11     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 45) = 316.4786, p-val < .0001
## 
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 7.9389, p-val = 0.0473
## 
## Model Results:
## 
##                                  estimate      se     zval    pval    ci.lb 
## intrcpt                            0.2483  0.3817   0.6505  0.5154  -0.4999 
## n_repetitions_sentence             0.0477  0.0746   0.6398  0.5223  -0.0985 
## mean_age                           0.0006  0.0004   1.2693  0.2043  -0.0003 
## n_repetitions_sentence:mean_age   -0.0001  0.0001  -0.4340  0.6643  -0.0003 
##                                   ci.ub 
## intrcpt                          0.9965    
## n_repetitions_sentence           0.1939    
## mean_age                         0.0014    
## n_repetitions_sentence:mean_age  0.0002    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

argument types

ma_data

cis_by_at <- ma_data %>%
    group_by(agent_argument_type) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)

ma_data %>% filter(agent_argument_type == "pronoun" | agent_argument_type == "noun") -> ma_data_by_at
cis_by_at %>% filter(agent_argument_type == "pronoun" | agent_argument_type == "noun") -> cis_by_at

ggplot(data=ma_data_by_at, aes(x = agent_argument_type, 
                    y = d_calc, 
                    color = agent_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Agent Argument Type") +
  ggtitle("SB effect size by agent argument type") +
  geom_errorbar(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

ggplot(data=ma_data_by_at, aes(x = agent_argument_type, 
                    y = d_calc, 
                    color = agent_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Agent Argument Type") +
  ggtitle("SB effect size by agent argument type") +
  geom_errorbar(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  facet_wrap(~sentence_structure) + 
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

ma_data_by_at %>% 
  filter(sentence_structure == "transitive") %>% 
  filter(patient_argument_type == "pronoun" | patient_argument_type == "noun") -> ma_data_by_pat
  
cis_by_pat <- ma_data_by_pat %>%
    group_by(patient_argument_type) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)

ggplot(data=ma_data_by_pat, aes(x = patient_argument_type, 
                    y = d_calc, 
                    color = patient_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Patient Argument Type") +
  ggtitle("SB effect size by patient argument type") +
  geom_errorbar(data = cis_by_pat, 
                  aes(x = patient_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_pat, 
                  aes(x = patient_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  facet_wrap(~agent_argument_type) +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

summary

tree plot

ma_model <- rma(ma_data$d_calc, ma_data$d_var_calc)
ma_model

## 
## Random-Effects Model (k = 51; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 1.1476 (SE = 0.2602)
## tau (square root of estimated tau^2 value):      1.0712
## I^2 (total heterogeneity / total variability):   91.77%
## H^2 (total variability / sampling variability):  12.16
## 
## Test for Heterogeneity:
## Q(df = 50) = 337.6601, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.6290  0.1602  3.9273  <.0001  0.3151  0.9429  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(ma_model,
       header = T,
       slab = ma_data$unique_id,
       col = "red",
       cex = .7)

Funnel plot

ma_data %>% 
  mutate(color = ifelse(sentence_structure == "transitive", "red", "blue"),
         color_sample_size = ifelse(n_1 < 10, "red", ifelse(n_1 < 20, "orange", "yellow")),
         color_confidence = ifelse(inclusion_certainty == 1, "red", "black"))-> ma_data_funnel

ma_data_funnel %>% filter (abs(d_calc) < 5) -> ma_data_funnel_no_outlier
ss_colors <- ma_data_funnel$color
ss_colors_no_outlier <- ma_data_funnel_no_outlier$color 
size_colors <- ma_data_funnel$color_sample_size
size_colors_no_outlier <- ma_data_funnel_no_outlier$color_sample_size
c_colors <- ma_data_funnel$color_confidence
c_colors_no_outlier <- ma_data_funnel_no_outlier$color_confidence

ma_model_funnel <- rma(ma_data_funnel$d_calc, ma_data_funnel$d_var_calc)
ma_model_funnel_no_outlier <- rma(ma_data_funnel_no_outlier$d_calc, ma_data_funnel_no_outlier$d_var_calc)

f1<- funnel(ma_model_funnel, xlab = "Effect Size", col = ss_colors) 
legend("topright",bg = "white",legend = c("transitive","intransitive"),pch=16,col=c("red", "blue"))
title(main = "All effect sizes break down by sentence structure")

f2<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = ss_colors_no_outlier) 
legend("topright",bg = "white",legend = c("transitive","intransitive"),pch=16,col=c("red", "blue"))
title(main = "effect sizes excluded outliers (abs <5) break down by sentence structure")

f3<- funnel(ma_model_funnel, xlab = "Effect Size", col = size_colors) 
legend("topright",bg = "white",legend = c("small(<10)","medium(10-20)", "large(>20)"),pch=16,col=c("red", "orange", "yellow"))
title(main = "all effect sizes break down by sample size")

f4<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = size_colors_no_outlier) 
legend("topright",bg = "white",legend = c("small(<10)","medium(10-20)", "large(>20)"),pch=16,col=c("red", "orange", "yellow"))
title(main = "effect sizes excluded outliers (abs <5) break down by sample size")

f5<- funnel(ma_model_funnel, xlab = "Effect Size", col = c_colors) 
legend("topright",bg = "white",legend = c("weird ones","normal ones"),pch=16,col=c("red", "black"))
title(main = "all effect sizes break down by confidence")

f6<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = c_colors_no_outlier) 
legend("topright",bg = "white",legend = c("weird ones","normal ones"),pch=16,col=c("red", "black"))
title(main = "effect sizes excluded outliers (abs <5) break down by confidence")

sb DRAFT-high confidence