library(tidyverse)
library(googlesheets4)
library(metafor)
library(knitr)
library(gridExtra)
source("calculate_es_bootstrapping.R")
source("tidy_data_bootstrapping.R")
GOOGLE_SHEET_ID <- "1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o"

read_raw_data_and_clean("1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o")
write_tidy_sheet("1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o")
ma_data <- read_sheet(MA_DATA_GOOGLE_SHEET_ID, "MA data tidy with ES NEW")


ma_data <- ma_data %>% filter(!is.na(d_calc)) %>% mutate(practice_phase = ifelse(practice_phase == "NA","no",practice_phase),
                                                         character_identification = ifelse(character_identification == "NA", "no", character_identification),
                                                         presentation_type = ifelse(presentation_type == "immediate-after", "immediate_after", ifelse(presentation_type == "Immediate-after", "immediate_after", presentation_type)),
                                                         patient_argument_type = ifelse(patient_argument_type == "N/A", "NA", patient_argument_type),
                                                         population_type = ifelse(population_type == "typical_developing", "typically_developing", population_type),
                                                         agent_argument_type = ifelse(agent_argument_type == "pronoun_and_noun" | agent_argument_type == "pronoung_and_noun", "noun_and_pronoun", agent_argument_type),
                                                         test_type = ifelse(test_type == "actor", "agent", test_type))

Data Overview

number of differnet effectsizes
number of conditions included

N_effect_sizes <- length((!is.na(ma_data$d_calc))&!(ma_data$d_calc ==0))
N_papers <- length(unique(ma_data$unique_id))

There are 96 effect sizes collected from 19 different papers.

Variable Summary

distribution of sample size

summary(ma_data$n_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00    8.00   12.00   13.39   16.00   32.00

ma_data %>% 
  ggplot(aes(x = n_1)) + 
  geom_histogram() + 
  geom_vline(xintercept =mean(ma_data$n_1)) +
  stat_bin(binwidth = 2) + 
  labs(title = "Distribution of sample size")

distribtuion of x_1

summary(ma_data$x_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0450  0.4505  0.5529  0.5256  0.6200  0.8800

ma_data %>% 
  ggplot(aes(x = x_1)) + 
  geom_histogram() + 
  stat_bin(binwidth = 0.05) + 
  geom_vline(xintercept =mean(ma_data$x_1))+
  labs(title = "Distribution of x_1")

- distribtuion of sd_1

summary(ma_data$SD_1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01944 0.06000 0.13500 0.15703 0.20000 0.49000

ma_data %>% 
  ggplot(aes(x =SD_1 )) + 
  geom_histogram() + 
  stat_bin(binwidth = 0.01) + 
  geom_vline(xintercept =mean(ma_data$SD_1))+
  labs(title = "Distribution of sd_1")

distribution of d_calc

summary(ma_data$d_calc)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -10.1111  -0.2390   0.3934   0.3271   1.2139   4.1184

ma_data %>% 
  ggplot(aes(x =d_calc )) + 
  geom_histogram() + 
  geom_vline(xintercept = mean(ma_data$d_calc))+
  labs(title = "Distribution of d_calc")

summary for all categorical variables

ma_data %>% group_by(sentence_structure) %>% 
  summarize(count = n()) %>% ggplot(aes(x=sentence_structure, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8)) ->p1

ma_data %>% group_by(language) %>% 
  summarize(count = n()) %>% ggplot(aes(x=language, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8)) ->p2

ma_data %>% group_by(population_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=population_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p3

ma_data %>% group_by(agent_argument_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=agent_argument_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p4

ma_data %>% group_by(patient_argument_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=patient_argument_type , y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p5

ma_data %>% group_by(stimuli_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_type  , y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p6

ma_data %>% group_by(stimuli_modality) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_modality, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p7

ma_data %>% group_by(stimuli_actor) %>% 
  summarize(count = n()) %>% ggplot(aes(x=stimuli_actor, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p8

ma_data %>% group_by(presentation_type) %>% 
  summarize(count = n()) %>% ggplot(aes(x=presentation_type, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p9

ma_data %>% group_by(character_identification) %>% 
  summarize(count = n()) %>% ggplot(aes(x=character_identification, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p10

ma_data %>% group_by(practice_phase) %>% 
  summarize(count = n()) %>% ggplot(aes(x=practice_phase, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p11

ma_data %>% group_by(test_mass_or_distributed) %>% 
  summarize(count = n()) %>% ggplot(aes(x=test_mass_or_distributed, y=count)) +     
  geom_col(position = 'dodge',width=0.4) + 
  theme(text = element_text(size=8))->p12

margin = theme(plot.margin = unit(c(2,2,2,2), "cm"))
grid.arrange(p1, p2, p3, p4,p5,p6,p7,p8,p9,p10,p11,p12, nrow = 3)

age

age only

# all ages (days)

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic() +
  theme(legend.position = "none")

# all ages (months)

ggplot(ma_data, aes(x = (mean_age/30.44), 
                                 y = d_calc, 
                                 size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months)") +
  theme_classic() +
  theme(legend.position = "none")

# Younger than 50 months 

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot( mapping = aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot(mapping = aes(x = mean_age/30.44, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

# younger than 36 months
  
ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot(mapping = aes(x = mean_age, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot(mapping = aes(x = mean_age/30.44, y = d_calc, size = n_1)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

## age and sentence structure

ma_data_no_bare_verb <- ma_data %>% filter(sentence_structure != "bare_verb")

# all ages (days)
ggplot(data = ma_data_no_bare_verb, aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic()

# all ages (months)

ggplot(data = ma_data_no_bare_verb, aes(x = (mean_age/30.44), y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic()

# Younger than 50 months 
ma_data_no_bare_verb_less_50 <- ma_data_no_bare_verb %>% filter(mean_age < 50*30.44 |mean_age==50*30.44)

ggplot(data = ma_data_no_bare_verb_less_50, mapping = aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic()

ggplot(data = ma_data_no_bare_verb_less_50, mapping = aes(x = mean_age/30.44, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 50 months") +
  theme_classic()

# younger than 36 months

ma_data_no_bare_verb_less_36 <- ma_data_no_bare_verb %>% filter(mean_age < 36*30.44|mean_age  == 36*30.44)

ggplot(data = ma_data_no_bare_verb_less_36, mapping = aes(x = mean_age, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days), younger than 36 months") +
  theme_classic()

ggplot(data=ma_data_no_bare_verb_less_36, mapping = aes(x = mean_age/30.44, y = d_calc, color = sentence_structure)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months), younger than 36 months") +
  theme_classic()

age and others

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = population_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by population type") +
  theme_classic() -> ae1


ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = stimuli_modality)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by stimuli_modality") +
  theme_classic() -> ae2

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = stimuli_actor)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by stimuli_actor") +
  theme_classic() -> ae3

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = character_identification)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by character_identification") +
  theme_classic() -> ae4

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = practice_phase)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by practice phase") +
  theme_classic() -> ae5

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = test_mass_or_distributed)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by test type") +
  theme_classic() -> ae6

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = presentation_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by presentation type") +
  theme_classic() -> ae7

ma_data %>% 
ggplot(aes(x = mean_age, y = d_calc, color = test_type)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days) break down by test type") +
  theme_classic() -> ae8

grid.arrange(ae1,ae2,ae3,ae4, nrow = 2)

grid.arrange(ae5,ae6,ae7, ae8, nrow =2)

ae1

ae2

ae3

ae4

ae5

ae6

ae7

ae8

Models

m1 <- rma.mv(d_calc, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m1)

## 
## Multivariate Meta-Analysis Model (k = 96; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -250.1344   500.2688   504.2688   509.3766   504.3993   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.5419  0.7361     19     no  short_cite 
## 
## Test for Heterogeneity:
## Q(df = 95) = 700.5932, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.4976  0.1745  2.8521  0.0043  0.1557  0.8396  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age

m2 <- rma.mv(d_calc ~ mean_age , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m2)

## 
## Multivariate Meta-Analysis Model (k = 95; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -236.3092   472.6185   478.6185   486.2163   478.8881   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.5582  0.7471     19     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 93) = 674.3523, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 8.9578, p-val = 0.0028
## 
## Model Results:
## 
##           estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt     0.2027  0.2048  0.9900  0.3222  -0.1987  0.6041     
## mean_age    0.0003  0.0001  2.9930  0.0028   0.0001  0.0006  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age + sentence structure

m3 <- rma.mv(d_calc ~ mean_age + sentence_structure , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_no_bare_verb)
summary(m3)

## 
## Multivariate Meta-Analysis Model (k = 92; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -224.2725   448.5450   456.5450   466.4996   457.0212   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6297  0.7935     19     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 89) = 664.0076, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 28.7357, p-val < .0001
## 
## Model Results:
## 
##                               estimate      se     zval    pval    ci.lb 
## intrcpt                        -0.0523  0.2235  -0.2339  0.8151  -0.4903 
## mean_age                        0.0003  0.0001   2.6203  0.0088   0.0001 
## sentence_structuretransitive    0.4320  0.0946   4.5677  <.0001   0.2466 
##                                ci.ub 
## intrcpt                       0.3858      
## mean_age                      0.0005   ** 
## sentence_structuretransitive  0.6174  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Age + sentence structure & interaction??

m4 <- rma.mv(d_calc ~ mean_age + sentence_structure + mean_age*sentence_structure , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_no_bare_verb)
summary(m4)

## 
## Multivariate Meta-Analysis Model (k = 92; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -223.9740   447.9480   457.9480   470.3346   458.6797   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6351  0.7969     19     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 88) = 663.8345, p-val < .0001
## 
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 29.2873, p-val < .0001
## 
## Model Results:
## 
##                                        estimate      se     zval    pval 
## intrcpt                                  0.1820  0.3921   0.4641  0.6426 
## mean_age                                -0.0000  0.0005  -0.0283  0.9775 
## sentence_structuretransitive             0.1829  0.3548   0.5155  0.6062 
## mean_age:sentence_structuretransitive    0.0003  0.0005   0.7289  0.4661 
##                                          ci.lb   ci.ub 
## intrcpt                                -0.5865  0.9504    
## mean_age                               -0.0009  0.0009    
## sentence_structuretransitive           -0.5126  0.8784    
## mean_age:sentence_structuretransitive  -0.0006  0.0013    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#vocab

productive_vocab_median has more entries than productive_vocab_mean, so using productive_vocab_median here

ma_data %>% 
  filter(productive_vocab_median != "NA") %>% 
  mutate(productive_vocab_median = as.numeric(productive_vocab_median))-> ma_data_vocab_median

#vocab ## vocab and age

ma_data_vocab_median %>% 
  ggplot(aes(x = productive_vocab_median, y = mean_age)) +
  geom_point() + 
  geom_smooth(method = "lm") + 
  labs(title = "median productive vocabulary and age by day")

vocab and effect size

ma_data_vocab_median %>% 
  ggplot(aes(x = productive_vocab_median, y = d_calc)) +
  geom_point() + 
  geom_smooth(method = "lm") + 
  labs(title = "median productive vocabulary and sample size")

model:

m4 <- rma.mv(d_calc ~ productive_vocab_median  , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data_vocab_median)
summary(m4)

## 
## Multivariate Meta-Analysis Model (k = 42; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -92.3282  184.6564  190.6564  195.7230  191.3230   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.8064  0.8980      7     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 40) = 258.2910, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 2.9983, p-val = 0.0834
## 
## Model Results:
## 
##                          estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt                    0.8960  0.3614   2.4789  0.0132   0.1876  1.6044  * 
## productive_vocab_median   -0.0033  0.0019  -1.7316  0.0834  -0.0071  0.0004  . 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

n_repetition sentence

age and n_repetition

older kiddos have less repetitions

ma_data %>% 
  ggplot(mapping = aes(x = mean_age, y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all)") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 50*30.44) %>% 
  ggplot( mapping = aes(x = mean_age, y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all), younger than 50 months") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% filter(mean_age <= 36*30.44) %>% 
  ggplot( mapping = aes(x = mean_age,y = n_repetitions_sentence)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("number of repetition per novel verb") +
  xlab("mean_age") +
  ggtitle("mean_age vs. number of repetitions for verb (all), younger than 36 months") +
  theme_classic() +
  theme(legend.position = "none")

does not seem to correlate with effect sizes

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb") +
  theme_classic() +
  theme(legend.position = "none")

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = test_mass_or_distributed)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb") +
  theme_classic()

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = character_identification)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb (break down by character identification)") +
  theme_classic()

ma_data %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence, y = d_calc, color = practice_phase)) +
  geom_point() +
  ylab("Effect Size") +
  xlab("number of repetition per novel verb") +
  ggtitle("Syntactical Bootstrapping effect size vs. number of repetitions for verb (break down by practice phase)") +
  theme_classic()

model

m5 <- rma.mv(d_calc ~ n_repetitions_sentence , V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m5)

## 
## Multivariate Meta-Analysis Model (k = 90; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -221.9184   443.8368   449.8368   457.2689   450.1226   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6293  0.7933     17     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 88) = 643.5396, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.9216, p-val = 0.3371
## 
## Model Results:
## 
##                         estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                   0.4135  0.2352  1.7584  0.0787  -0.0474  0.8744  . 
## n_repetitions_sentence    0.0150  0.0156  0.9600  0.3371  -0.0156  0.0456    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m6 <- rma.mv(d_calc ~ n_repetitions_sentence + mean_age, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m6)

## 
## Multivariate Meta-Analysis Model (k = 89; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -207.5827   415.1654   423.1654   432.9828   423.6592   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6584  0.8114     17     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 86) = 619.5758, p-val < .0001
## 
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 11.3523, p-val = 0.0034
## 
## Model Results:
## 
##                         estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt                   0.0052  0.2724  0.0191  0.9848  -0.5286  0.5390     
## n_repetitions_sentence    0.0246  0.0159  1.5452  0.1223  -0.0066  0.0558     
## mean_age                  0.0004  0.0001  3.2248  0.0013   0.0001  0.0006  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m7 <- rma.mv(d_calc ~ n_repetitions_sentence + mean_age + n_repetitions_sentence * mean_age, V = d_var_calc,
                            random = ~ 1 | short_cite, data = ma_data)
summary(m7)

## 
## Multivariate Meta-Analysis Model (k = 89; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -204.9351   409.8702   419.8702   432.0834   420.6296   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed      factor 
## sigma^2    0.6274  0.7921     17     no  short_cite 
## 
## Test for Residual Heterogeneity:
## QE(df = 85) = 617.6352, p-val < .0001
## 
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 16.3494, p-val = 0.0010
## 
## Model Results:
## 
##                                  estimate      se     zval    pval    ci.lb 
## intrcpt                           -0.0246  0.2692  -0.0914  0.9272  -0.5523 
## n_repetitions_sentence             0.0646  0.0239   2.7022  0.0069   0.0178 
## mean_age                           0.0007  0.0002   3.7332  0.0002   0.0003 
## n_repetitions_sentence:mean_age   -0.0001  0.0000  -2.2508  0.0244  -0.0002 
##                                    ci.ub 
## intrcpt                           0.5031      
## n_repetitions_sentence            0.1115   ** 
## mean_age                          0.0011  *** 
## n_repetitions_sentence:mean_age  -0.0000    * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

argument types

ma_data

cis_by_at <- ma_data %>%
    group_by(agent_argument_type) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)

ma_data %>% filter(agent_argument_type == "pronoun" | agent_argument_type == "noun") -> ma_data_by_at
cis_by_at %>% filter(agent_argument_type == "pronoun" | agent_argument_type == "noun") -> cis_by_at

ggplot(data=ma_data_by_at, aes(x = agent_argument_type, 
                    y = d_calc, 
                    color = agent_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Agent Argument Type") +
  ggtitle("SB effect size by agent argument type") +
  geom_errorbar(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

ggplot(data=ma_data_by_at, aes(x = agent_argument_type, 
                    y = d_calc, 
                    color = agent_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Agent Argument Type") +
  ggtitle("SB effect size by agent argument type") +
  geom_errorbar(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_at, 
                  aes(x = agent_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  facet_wrap(~sentence_structure) + 
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

ma_data_by_at %>% 
  filter(sentence_structure == "transitive") %>% 
  filter(patient_argument_type == "pronoun" | patient_argument_type == "noun") -> ma_data_by_pat
  
cis_by_pat <- ma_data_by_pat %>%
    group_by(patient_argument_type) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)

ggplot(data=ma_data_by_pat, aes(x = patient_argument_type, 
                    y = d_calc, 
                    color = patient_argument_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Patient Argument Type") +
  ggtitle("SB effect size by patient argument type") +
  geom_errorbar(data = cis_by_pat, 
                  aes(x = patient_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_pat, 
                  aes(x = patient_argument_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  facet_wrap(~agent_argument_type) +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

summary

tree plot

ma_model <- rma(ma_data$d_calc, ma_data$d_var_calc)
ma_model

## 
## Random-Effects Model (k = 96; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 1.5517 (SE = 0.2514)
## tau (square root of estimated tau^2 value):      1.2457
## I^2 (total heterogeneity / total variability):   93.52%
## H^2 (total variability / sampling variability):  15.44
## 
## Test for Heterogeneity:
## Q(df = 95) = 700.5932, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.4206  0.1349  3.1176  0.0018  0.1562  0.6850  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(ma_model,
       header = T,
       slab = ma_data$unique_id,
       col = "red",
       cex = .7)

Funnel plot

ma_data %>% 
  mutate(color = ifelse(sentence_structure == "transitive", "red", "blue"),
         color_sample_size = ifelse(n_1 < 10, "red", ifelse(n_1 < 20, "orange", "yellow")),
         color_confidence = ifelse(inclusion_certainty == 1, "red", "black"))-> ma_data_funnel

ma_data_funnel %>% filter (abs(d_calc) < 5) -> ma_data_funnel_no_outlier
ss_colors <- ma_data_funnel$color
ss_colors_no_outlier <- ma_data_funnel_no_outlier$color 
size_colors <- ma_data_funnel$color_sample_size
size_colors_no_outlier <- ma_data_funnel_no_outlier$color_sample_size
c_colors <- ma_data_funnel$color_confidence
c_colors_no_outlier <- ma_data_funnel_no_outlier$color_confidence

ma_model_funnel <- rma(ma_data_funnel$d_calc, ma_data_funnel$d_var_calc)
ma_model_funnel_no_outlier <- rma(ma_data_funnel_no_outlier$d_calc, ma_data_funnel_no_outlier$d_var_calc)

f1<- funnel(ma_model_funnel, xlab = "Effect Size", col = ss_colors) 
legend("topright",bg = "white",legend = c("transitive","intransitive"),pch=16,col=c("red", "blue"))
title(main = "All effect sizes break down by sentence structure")

f2<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = ss_colors_no_outlier) 
legend("topright",bg = "white",legend = c("transitive","intransitive"),pch=16,col=c("red", "blue"))
title(main = "effect sizes excluded outliers (abs <5) break down by sentence structure")

f3<- funnel(ma_model_funnel, xlab = "Effect Size", col = size_colors) 
legend("topright",bg = "white",legend = c("small(<10)","medium(10-20)", "large(>20)"),pch=16,col=c("red", "orange", "yellow"))
title(main = "all effect sizes break down by sample size")

f4<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = size_colors_no_outlier) 
legend("topright",bg = "white",legend = c("small(<10)","medium(10-20)", "large(>20)"),pch=16,col=c("red", "orange", "yellow"))
title(main = "effect sizes excluded outliers (abs <5) break down by sample size")

f5<- funnel(ma_model_funnel, xlab = "Effect Size", col = c_colors) 
legend("topright",bg = "white",legend = c("weird ones","normal ones"),pch=16,col=c("red", "black"))
title(main = "all effect sizes break down by confidence")

f6<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size", col = c_colors_no_outlier) 
legend("topright",bg = "white",legend = c("weird ones","normal ones"),pch=16,col=c("red", "black"))
title(main = "effect sizes excluded outliers (abs <5) break down by confidence")

sb DRAFT-everything