DATA_PATH <- here("data/processed/syntactic_bootstrapping_tidy_data.csv") # make all variables (i.e. things that might change) as capital letters at the top of the scripts
ma_data <- read_csv(DATA_PATH) %>%
filter(language == "English",
population_type == "typically_developing",
stimuli_modality == "video")
#N_effect_sizes <- length((!is.na(ma_data$d_calc))&!(ma_data$d_calc ==0))
#N_papers <- length(unique(ma_data$unique_id))
# let's do this in the tidyverse way (which is much more readable)
n_effect_sizes <- ma_data %>%
filter(!is.na(d_calc)) %>%
nrow()
n_papers <- ma_data %>%
distinct(unique_id) %>%
nrow()
There are 63 effect sizes collected from 17 different papers.
Here are the papers in this analysis:
ma_data %>%
count(short_cite) %>%
arrange(-n) %>%
DT::datatable()
mean age is misssing for one
CONTINUOUS_VARS <- c("n_1", "x_1", "sd_1", "d_calc", "d_var_calc", "mean_age")
long_continuous <- ma_data %>%
pivot_longer(cols = CONTINUOUS_VARS)
long_continuous %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_wrap(~ name, scale = "free_x") +
labs(title = "Distribution of continuous measures")
long_continuous %>%
group_by(name) %>%
summarize(mean = mean(value),
sd = sd(value)) %>%
kable()
| name | mean | sd |
|---|---|---|
| d_calc | 0.4488370 | 2.2721814 |
| d_var_calc | 0.3400485 | 0.6908566 |
| mean_age | NA | NA |
| n_1 | 13.6825397 | 5.7720835 |
| sd_1 | 0.1075735 | 0.0737094 |
| x_1 | 0.5384772 | 0.1222712 |
CATEGORICAL_VARS <- c("sentence_structure", "language", "population_type",
"agent_argument_type", "patient_argument_type", "stimuli_type",
"stimuli_modality", "presentation_type", "character_identification",
"test_mass_or_distributed", "practice_phase")
long_categorical <- ma_data %>%
pivot_longer(cols = CATEGORICAL_VARS) %>%
count(name, value) # this is a short cut for group_by() %>% summarize(count = n())
long_categorical %>%
ggplot(aes(x = value, y = n)) +
facet_wrap(~ name, scale = "free_x") +
geom_col(position = 'dodge',width=0.4) +
theme(text = element_text(size=8),
axis.text.x = element_text(angle = 90, hjust = 1)) # rotate x-axis text
m1 <- rma.mv(d_calc, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m1)
##
## Multivariate Meta-Analysis Model (k = 63; method: REML)
##
## logLik Deviance AIC BIC AICc
## -200.5332 401.0665 405.0665 409.3207 405.2699
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.5904 0.7684 17 no short_cite
##
## Test for Heterogeneity:
## Q(df = 62) = 559.8747, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5485 0.1927 2.8470 0.0044 0.1709 0.9262 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data_young_only <- ma_data %>%
mutate(age_months = mean_age/30.44) %>%
filter(age_months < 36)
m_young <- rma.mv(d_calc, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data_young_only)
summary(m_young)
##
## Multivariate Meta-Analysis Model (k = 54; method: REML)
##
## logLik Deviance AIC BIC AICc
## -157.3340 314.6681 318.6681 322.6087 318.9081
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6028 0.7764 15 no short_cite
##
## Test for Heterogeneity:
## Q(df = 53) = 448.4174, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5159 0.2075 2.4865 0.0129 0.1092 0.9225 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data %>%
ggplot(aes(x = mean_age/30.44, y = d_calc, size = n_1)) +
geom_point() +
geom_smooth(method = "lm") +
geom_smooth(color = "red") +
ylab("Effect Size") +
xlab("Age (days)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (months)") +
theme(legend.position = "none")
m_age <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age)
##
## Multivariate Meta-Analysis Model (k = 62; method: REML)
##
## logLik Deviance AIC BIC AICc
## -186.7742 373.5485 379.5485 385.8315 379.9770
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6391 0.7994 17 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 60) = 533.1630, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 8.1015, p-val = 0.0044
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.3883 0.3899 -0.9958 0.3193 -1.1525 0.3759
## mean_age 0.0011 0.0004 2.8463 0.0044 0.0003 0.0019 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Let’s only look at ES for kids < 36
ma_data_young_only %>%
ggplot(aes(x = age_months, y = d_calc, size = n_1)) +
geom_point() +
geom_smooth(method = "lm") +
geom_smooth(color = "red") +
ylab("Effect Size") +
xlab("Age (days)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
theme(legend.position = "none")
m_age_young <- rma.mv(d_calc ~ mean_age, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data_young_only)
summary(m_age_young)
##
## Multivariate Meta-Analysis Model (k = 54; method: REML)
##
## logLik Deviance AIC BIC AICc
## -156.3612 312.7224 318.7224 324.5762 319.2224
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6248 0.7904 15 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 52) = 443.1411, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.0434, p-val = 0.8349
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 0.3152 0.9877 0.3191 0.7497 -1.6206 2.2509
## mean_age 0.0003 0.0013 0.2084 0.8349 -0.0022 0.0027
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data %>%
filter(sentence_structure != "bare_verb") %>%
mutate(age_months = mean_age/30.44) %>%
ggplot(aes(x = age_months, y = d_calc, size = n_1, , color = sentence_structure)) +
geom_point() +
geom_smooth(method = "lm") +
ylab("Effect Size") +
xlab("Age (days)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (days)")
m_age_sentence <- rma.mv(d_calc ~ mean_age + sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_sentence)
##
## Multivariate Meta-Analysis Model (k = 62; method: REML)
##
## logLik Deviance AIC BIC AICc
## -172.8666 345.7332 353.7332 362.0434 354.4739
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.7409 0.8607 17 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 59) = 524.8546, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 36.0580, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -0.6222 0.4042 -1.5393 0.1237 -1.4144
## mean_age 0.0009 0.0004 2.3228 0.0202 0.0001
## sentence_structuretransitive 0.6163 0.1169 5.2714 <.0001 0.3872
## ci.ub
## intrcpt 0.1700
## mean_age 0.0017 *
## sentence_structuretransitive 0.8455 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interaction:
m_age_sentence <- rma.mv(d_calc ~ mean_age * sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_sentence)
##
## Multivariate Meta-Analysis Model (k = 62; method: REML)
##
## logLik Deviance AIC BIC AICc
## -171.9204 343.8407 353.8407 364.1430 354.9946
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.7469 0.8642 17 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 58) = 524.8480, p-val < .0001
##
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 36.9478, p-val < .0001
##
## Model Results:
##
## estimate se zval pval
## intrcpt -1.3139 0.8450 -1.5550 0.1199
## mean_age 0.0018 0.0011 1.7339 0.0829
## sentence_structuretransitive 1.3364 0.7824 1.7081 0.0876
## mean_age:sentence_structuretransitive -0.0009 0.0010 -0.9305 0.3521
## ci.lb ci.ub
## intrcpt -2.9700 0.3422
## mean_age -0.0002 0.0039 .
## sentence_structuretransitive -0.1970 2.8698 .
## mean_age:sentence_structuretransitive -0.0029 0.0010
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data_young_only <- ma_data %>%
mutate(age_months = mean_age/30.44) %>%
filter(age_months < 36)
ma_data_young_only %>%
filter(sentence_structure != "bare_verb") %>%
ggplot(aes(x = age_months, y = d_calc, size = n_1, , color = sentence_structure)) +
geom_point() +
geom_smooth(method = "lm") +
ylab("Effect Size") +
xlab("Age (days)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (days)")
m_age_sentence_young <- rma.mv(d_calc ~ mean_age + sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data_young_only)
summary(m_age_sentence_young)
##
## Multivariate Meta-Analysis Model (k = 54; method: REML)
##
## logLik Deviance AIC BIC AICc
## -143.1808 286.3617 294.3617 302.0890 295.2312
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6921 0.8319 15 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 51) = 431.5026, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 26.3325, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt 0.8651 1.0097 0.8568 0.3916 -1.1139
## mean_age -0.0009 0.0013 -0.7085 0.4787 -0.0034
## sentence_structuretransitive 0.6097 0.1189 5.1258 <.0001 0.3765
## ci.ub
## intrcpt 2.8441
## mean_age 0.0016
## sentence_structuretransitive 0.8428 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data %>%
ggplot(aes(x = mean_age, y = d_calc, color = character_identification)) +
geom_point() +
geom_smooth(method = "lm") +
ylab("Effect Size") +
xlab("Age (days)") +
theme_classic()
m_age_char <- rma.mv(d_calc ~ mean_age + character_identification, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_char)
##
## Multivariate Meta-Analysis Model (k = 33; method: REML)
##
## logLik Deviance AIC BIC AICc
## -75.0093 150.0185 158.0185 163.6233 159.6185
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.9654 0.9826 11 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 30) = 293.4893, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 6.3893, p-val = 0.0410
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.6467 0.6567 -0.9847 0.3248 -1.9339 0.6405
## mean_age 0.0010 0.0004 2.4136 0.0158 0.0002 0.0019
## character_identificationyes 0.5846 0.6292 0.9292 0.3528 -0.6486 1.8178
##
## intrcpt
## mean_age *
## character_identificationyes
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data %>%
ggplot(aes(x = mean_age, y = d_calc, color = practice_phase)) +
geom_point() +
ylab("Effect Size") +
geom_smooth(method = "lm") +
xlab("Age (days)") +
theme_classic()
m_age_practice <- rma.mv(d_calc ~ mean_age + practice_phase, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_practice)
##
## Multivariate Meta-Analysis Model (k = 40; method: REML)
##
## logLik Deviance AIC BIC AICc
## -95.0446 190.0892 198.0892 204.5329 199.3392
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 1.0053 1.0026 11 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 37) = 343.8912, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 6.9090, p-val = 0.0316
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.8482 0.8814 -0.9624 0.3359 -2.5758 0.8793
## mean_age 0.0011 0.0004 2.5977 0.0094 0.0003 0.0020 **
## practice_phaseyes 0.6834 0.8101 0.8436 0.3989 -0.9043 2.2712
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data %>%
ggplot(aes(x = mean_age, y = d_calc, color = test_mass_or_distributed)) +
geom_point() +
ylab("Effect Size") +
geom_smooth(method = "lm") +
xlab("Age (days)") +
theme_classic()
m_age_test <- rma.mv(d_calc ~ mean_age + test_mass_or_distributed, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_test)
##
## Multivariate Meta-Analysis Model (k = 62; method: REML)
##
## logLik Deviance AIC BIC AICc
## -184.9207 369.8413 377.8413 386.1515 378.5821
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6459 0.8037 17 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 59) = 520.6656, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 9.0675, p-val = 0.0107
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -0.5649 0.4298 -1.3142 0.1888 -1.4073
## mean_age 0.0012 0.0004 2.9598 0.0031 0.0004
## test_mass_or_distributedmass 0.4369 0.4475 0.9764 0.3289 -0.4401
## ci.ub
## intrcpt 0.2776
## mean_age 0.0019 **
## test_mass_or_distributedmass 1.3140
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m_age_test_int <- rma.mv(d_calc ~ mean_age * test_mass_or_distributed, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_test_int)
##
## Multivariate Meta-Analysis Model (k = 62; method: REML)
##
## logLik Deviance AIC BIC AICc
## -182.9227 365.8454 375.8454 386.1476 376.9992
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.6380 0.7987 17 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 58) = 515.5189, p-val < .0001
##
## Test of Moderators (coefficients 2:4):
## QM(df = 3) = 11.1403, p-val = 0.0110
##
## Model Results:
##
## estimate se zval pval
## intrcpt -0.6627 0.4342 -1.5263 0.1269
## mean_age 0.0013 0.0004 3.1785 0.0015
## test_mass_or_distributedmass 2.7121 1.6401 1.6536 0.0982
## mean_age:test_mass_or_distributedmass -0.0030 0.0021 -1.4415 0.1495
## ci.lb ci.ub
## intrcpt -1.5137 0.1883
## mean_age 0.0005 0.0021 **
## test_mass_or_distributedmass -0.5024 5.9266 .
## mean_age:test_mass_or_distributedmass -0.0071 0.0011
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ma_data_with_vocab <- ma_data %>%
mutate(vocab = case_when(!is.na(productive_vocab_median) ~ productive_vocab_median,
!is.na(productive_vocab_mean) ~ productive_vocab_mean,
TRUE ~ NA_real_),
vocab_source = case_when(!is.na(productive_vocab_median) ~ "median",
!is.na(productive_vocab_mean) ~ "mean",
TRUE ~ NA_character_))
ma_data_with_vocab %>%
ggplot(aes(x = mean_age, y = vocab, color = vocab_source)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic()
ma_data_with_vocab %>%
ggplot(aes(x = productive_vocab_median, y = d_calc)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic()
cor.test(ma_data_with_vocab$mean_age,
ma_data_with_vocab$productive_vocab_median)
##
## Pearson's product-moment correlation
##
## data: ma_data_with_vocab$mean_age and ma_data_with_vocab$productive_vocab_median
## t = 16.671, df = 28, p-value = 4.524e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9029205 0.9776844
## sample estimates:
## cor
## 0.9531389
m_age_vocab <- rma.mv(d_calc ~ productive_vocab_median + sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_vocab)
##
## Multivariate Meta-Analysis Model (k = 30; method: REML)
##
## logLik Deviance AIC BIC AICc
## -73.8888 147.7776 155.7776 160.9610 157.5958
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.9426 0.9709 6 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 27) = 204.9175, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 22.5110, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt 0.4692 0.5463 0.8588 0.3904 -0.6015
## productive_vocab_median -0.0045 0.0073 -0.6197 0.5355 -0.0189
## sentence_structuretransitive 0.9011 0.1904 4.7335 <.0001 0.5280
## ci.ub
## intrcpt 1.5398
## productive_vocab_median 0.0098
## sentence_structuretransitive 1.2741 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m_age_vocab_age <- rma.mv(d_calc ~ mean_age + sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data %>% filter(!is.na(productive_vocab_median)))
summary(m_age_vocab_age)
##
## Multivariate Meta-Analysis Model (k = 30; method: REML)
##
## logLik Deviance AIC BIC AICc
## -72.9358 145.8716 153.8716 159.0550 155.6898
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.8799 0.9380 6 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 27) = 198.1789, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## QM(df = 2) = 23.7496, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt 2.1872 1.5713 1.3920 0.1639 -0.8924
## mean_age -0.0027 0.0021 -1.2738 0.2027 -0.0068
## sentence_structuretransitive 0.9154 0.1907 4.8010 <.0001 0.5417
## ci.ub
## intrcpt 5.2669
## mean_age 0.0014
## sentence_structuretransitive 1.2892 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m_age_mega <- rma.mv(d_calc ~ mean_age + test_mass_or_distributed + practice_phase + character_identification + sentence_structure, V = d_var_calc,
random = ~ 1 | short_cite, data = ma_data)
summary(m_age_mega)
##
## Multivariate Meta-Analysis Model (k = 26; method: REML)
##
## logLik Deviance AIC BIC AICc
## -53.1003 106.2006 120.2006 127.1707 129.5339
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 2.1458 1.4649 8 no short_cite
##
## Test for Residual Heterogeneity:
## QE(df = 20) = 221.8624, p-val < .0001
##
## Test of Moderators (coefficients 2:6):
## QM(df = 5) = 14.8364, p-val = 0.0111
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -2.1083 1.3594 -1.5509 0.1209 -4.7726
## mean_age 0.0011 0.0005 2.5186 0.0118 0.0003
## test_mass_or_distributedmass -0.8569 1.6326 -0.5249 0.5997 -4.0567
## practice_phaseyes 1.2165 1.3295 0.9150 0.3602 -1.3892
## character_identificationyes 1.1810 1.3250 0.8913 0.3728 -1.4160
## sentence_structuretransitive 0.6522 0.2465 2.6452 0.0082 0.1690
## ci.ub
## intrcpt 0.5561
## mean_age 0.0020 *
## test_mass_or_distributedmass 2.3428
## practice_phaseyes 3.8222
## character_identificationyes 3.7780
## sentence_structuretransitive 1.1354 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
```