complete_predictors <- read_csv("complete_predictors.csv") %>%
mutate(kuperman_minus_wordbank = KupermanAoA - wordbank_aoa_years)
full_predictor_list <- c("preschoolness","helpfulness","childes_adult_log_freq","concreteness")
write_model_formula <- function(outcome_var) {
fmla_full <- as.formula(paste(as.character(outcome_var), paste(" ~ "), paste(full_predictor_list, collapse="+")))
}
Zero-order correlations among predictors & outcomes
data_forcorr <- complete_predictors %>%
select(KupermanAoA, wordbank_aoa = wordbank_aoa_years,
picture_naming_aoa = morrison_aoa_threshold_years, babiness, preschoolness, helpfulness,
frequency = childes_adult_log_freq, concreteness, generality = mean_generality)
corrs <- cor(data_forcorr, use="pairwise.complete.obs",method="pearson")
pmat <- cor.mtest(data_forcorr, method="pearson")
pval <- pmat$p
corrplot(corrs, method="color", type="lower", addCoef.col = TRUE,
tl.col="black", diag = FALSE, p.mat=pval, sig.level=.05, insig="blank",number.cex = .7)

Note: although generality is correlated with other variables, it wasn’t a significant predictor in any models tested (raw AoAs or differences between different AoA measures), so it’s not included in any further analyses here.
Get a sense of what words are most different for each measure
Wordbank/Kuperman
ggplot(complete_predictors, aes(x = wordbank_aoa_years, y = KupermanAoA, label=word))+
geom_point()+
geom_text_repel(segment.alpha=.3, size=3)+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,6)+
ylim(0,10)+
labs(x = "Wordbank AoA")

Picture-naming/Kuperman
ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = KupermanAoA, label=word))+
geom_point()+
geom_text_repel(segment.alpha=.3, size=3)+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)+
labs(x = "Picture-naming AoA")

Picture-naming/Wordbank
ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = wordbank_aoa_years, label=word))+
geom_point()+
geom_text_repel(segment.alpha=.3, size=3)+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,6)+
ylim(0,10)+
labs(x = "Picture-naming AoA", y = "Wordbank AoA")

Get a sense of difference distributions
Wordbank/Kuperman
hist(complete_predictors$kuperman_minus_wordbank)

Kuperman/picture-naming
hist(complete_predictors$kuperman_minus_pn)

Wordbank/picture-naming
hist(complete_predictors$wordbank_minus_pn)

Predict Kuperman-Wordbank
k_pr_full_fm <- write_model_formula("kuperman_minus_wordbank")
k_pr_full <- lm(k_pr_full_fm, complete_predictors)
k_pr_preschoolness <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
k_pr_helpfulness <- lm(kuperman_minus_wordbank ~ helpfulness, complete_predictors)
k_pr_frequency <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
k_pr_concreteness <- lm(kuperman_minus_wordbank ~ concreteness, complete_predictors)
tab_model(k_pr_full, k_pr_preschoolness, k_pr_helpfulness, k_pr_frequency, k_pr_concreteness)
|
Â
|
kuperman minus wordbank
|
kuperman minus wordbank
|
kuperman minus wordbank
|
kuperman minus wordbank
|
kuperman minus wordbank
|
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
2.31
|
-3.91 – 8.53
|
0.464
|
3.21
|
2.77 – 3.66
|
<0.001
|
2.79
|
2.01 – 3.57
|
<0.001
|
3.94
|
3.07 – 4.82
|
<0.001
|
1.23
|
-5.59 – 8.05
|
0.721
|
|
preschoolness
|
-0.32
|
-0.52 – -0.13
|
0.001
|
-0.40
|
-0.59 – -0.22
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
helpfulness
|
-0.11
|
-0.33 – 0.11
|
0.330
|
|
|
|
-0.15
|
-0.39 – 0.08
|
0.195
|
|
|
|
|
|
|
|
childes_adult_log_freq
|
-0.15
|
-0.28 – -0.03
|
0.016
|
|
|
|
|
|
|
-0.23
|
-0.35 – -0.11
|
<0.001
|
|
|
|
|
concreteness
|
0.44
|
-0.83 – 1.72
|
0.490
|
|
|
|
|
|
|
|
|
|
0.21
|
-1.18 – 1.61
|
0.761
|
|
Observations
|
115
|
115
|
115
|
115
|
115
|
|
R2 / R2 adjusted
|
0.202 / 0.173
|
0.142 / 0.135
|
0.015 / 0.006
|
0.113 / 0.105
|
0.001 / -0.008
|
# run models to produce residuals, to then correlate with missing predictor. e.g. k_pr_no_preschoolness will let us correlate preschoolness with kuperman_minus_wordbank, controlling for frequency
k_pr_no_preschoolness <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
# k_pr_no_helpfulness <- lm(kuperman_minus_wordbank ~ concreteness + preschoolness + childes_adult_log_freq, complete_predictors)
k_pr_no_frequency <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
# k_pr_no_concreteness <- lm(kuperman_minus_wordbank ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)
k_pr_predictors <- complete_predictors %>%
select(num_item_id, word, KupermanAoA, parentreport_calculated_aoa_years, kuperman_minus_wordbank, preschoolness,
helpfulness, concreteness, childes_adult_log_freq) %>%
mutate(k_pr_no_preschoolness.resid = k_pr_no_preschoolness$resid,
k_pr_no_frequency.resid = k_pr_no_frequency$resid)
Plot Kuperman-Wordbank predictors individually
Preschoolness (r = -.28)
ggplot(k_pr_predictors, aes(x = preschoolness, y = k_pr_no_preschoolness.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Preschoolness", y = "Kuperman minus Wordbank, controlling for freq.")

Frequency (r = -.23)
ggplot(k_pr_predictors, aes(x = childes_adult_log_freq, y = k_pr_no_frequency.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Frequency", y = "Kuperman minus Wordbank, controlling for presch.")

Predict Kuperman - Picture-naming
k_pn_full_fm <- write_model_formula("kuperman_minus_pn")
k_pn_full <- lm(k_pn_full_fm, complete_predictors)
k_pn_preschoolness <- lm(kuperman_minus_pn ~ preschoolness, complete_predictors)
k_pn_helpfulness <- lm(kuperman_minus_pn ~ helpfulness, complete_predictors)
k_pn_frequency <- lm(kuperman_minus_pn ~ childes_adult_log_freq, complete_predictors)
k_pn_concreteness <- lm(kuperman_minus_pn ~ concreteness, complete_predictors)
tab_model(k_pn_full, k_pn_preschoolness, k_pn_helpfulness, k_pn_frequency, k_pn_concreteness)
|
Â
|
kuperman minus pn
|
kuperman minus pn
|
kuperman minus pn
|
kuperman minus pn
|
kuperman minus pn
|
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-13.38
|
-25.61 – -1.15
|
0.032
|
2.10
|
1.14 – 3.05
|
<0.001
|
2.83
|
1.26 – 4.40
|
0.001
|
-1.32
|
-3.16 – 0.53
|
0.161
|
-10.72
|
-24.46 – 3.03
|
0.125
|
|
preschoolness
|
-0.77
|
-1.15 – -0.39
|
<0.001
|
-0.49
|
-0.88 – -0.09
|
0.017
|
|
|
|
|
|
|
|
|
|
|
helpfulness
|
-0.73
|
-1.16 – -0.31
|
0.001
|
|
|
|
-0.56
|
-1.03 – -0.09
|
0.019
|
|
|
|
|
|
|
|
childes_adult_log_freq
|
0.54
|
0.29 – 0.79
|
<0.001
|
|
|
|
|
|
|
0.32
|
0.07 – 0.58
|
0.014
|
|
|
|
|
concreteness
|
3.00
|
0.50 – 5.50
|
0.019
|
|
|
|
|
|
|
|
|
|
2.39
|
-0.42 – 5.20
|
0.095
|
|
Observations
|
115
|
115
|
115
|
115
|
115
|
|
R2 / R2 adjusted
|
0.258 / 0.232
|
0.050 / 0.041
|
0.047 / 0.039
|
0.052 / 0.044
|
0.025 / 0.016
|
# run models to produce residuals, to then correlate with missing predictor. e.g. k_pn_no_preschoolness will let us correlate preschoolness with kuperman_minus_pn, controlling for frequency, and helpfulness
k_pn_no_preschoolness <- lm(kuperman_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)
k_pn_no_helpfulness <- lm(kuperman_minus_pn ~ preschoolness + childes_adult_log_freq, complete_predictors)
k_pn_no_frequency <- lm(kuperman_minus_pn ~ helpfulness + preschoolness, complete_predictors)
# will not run this model for this DV because it wasn't predictive in the full model - not enough variability
#k_pn_no_concreteness <- lm(kuperman_minus_pn ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)
k_pn_predictors <- complete_predictors %>%
select(num_item_id, word, KupermanAoA, morrison_aoa_threshold_years, kuperman_minus_pn, preschoolness,
helpfulness, concreteness, childes_adult_log_freq) %>%
mutate(k_pn_no_preschoolness.resid = k_pn_no_preschoolness$resid,
k_pn_no_helpfulness.resid = k_pn_no_helpfulness$resid,
k_pn_no_frequency.resid = k_pn_no_frequency$resid)
Plot Kuperman - Picture naming predictors individually
Preschoolness (r = -.24)
ggplot(k_pn_predictors, aes(x = preschoolness, y = k_pn_no_preschoolness.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Preschoolness", y = "Kuperman minus picture-naming, controlling for freq./helpf.")

Helpfulness (r = -.19)
ggplot(k_pn_predictors, aes(x = helpfulness, y = k_pn_no_helpfulness.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Helpfulness", y = "Kuperman minus picture-naming, controlling for freq./presch.")

Frequency (r = .34)
ggplot(k_pn_predictors, aes(x = childes_adult_log_freq, y = k_pn_no_frequency.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Frequency", y = "Kuperman minus picture-naming, controlling for presch./helpf.")

Predict Wordbank - picture naming
pr_pn_full_fm <- write_model_formula("wordbank_minus_pn")
pr_pn_full <- lm(pr_pn_full_fm, complete_predictors)
pr_pn_preschoolness <- lm(wordbank_minus_pn ~ preschoolness, complete_predictors)
pr_pn_helpfulness <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
pr_pn_frequency <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_concreteness <- lm(wordbank_minus_pn ~ concreteness, complete_predictors)
tab_model(pr_pn_full, pr_pn_preschoolness, pr_pn_helpfulness, pr_pn_frequency, pr_pn_concreteness)
|
Â
|
wordbank minus pn
|
wordbank minus pn
|
wordbank minus pn
|
wordbank minus pn
|
wordbank minus pn
|
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-15.69
|
-27.37 – -4.01
|
0.009
|
-1.11
|
-2.06 – -0.16
|
0.022
|
0.04
|
-1.50 – 1.57
|
0.961
|
-5.26
|
-6.94 – -3.58
|
<0.001
|
-11.95
|
-25.30 – 1.40
|
0.079
|
|
preschoolness
|
-0.44
|
-0.80 – -0.08
|
0.016
|
-0.08
|
-0.48 – 0.31
|
0.679
|
|
|
|
|
|
|
|
|
|
|
helpfulness
|
-0.63
|
-1.04 – -0.22
|
0.003
|
|
|
|
-0.41
|
-0.87 – 0.05
|
0.082
|
|
|
|
|
|
|
|
childes_adult_log_freq
|
0.69
|
0.46 – 0.93
|
<0.001
|
|
|
|
|
|
|
0.55
|
0.32 – 0.79
|
<0.001
|
|
|
|
|
concreteness
|
2.55
|
0.17 – 4.94
|
0.036
|
|
|
|
|
|
|
|
|
|
2.17
|
-0.55 – 4.90
|
0.117
|
|
Observations
|
115
|
115
|
115
|
115
|
115
|
|
R2 / R2 adjusted
|
0.281 / 0.254
|
0.002 / -0.007
|
0.027 / 0.018
|
0.164 / 0.157
|
0.022 / 0.013
|
# run models to produce residuals, to then correlate with missing predictor. e.g. pr_pn_no_helpfulness will let us correlate helpfulness with pn_minus_parentreport, controlling for frequency & concreteness
#did not run preschoolness model because wasn't significant in full model above
#pr_pn_no_preschoolness <- lm(pn_minus_parentreport ~ concreteness + helpfulness + childes_adult_log_freq, complete_predictors)
pr_pn_no_helpfulness <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_no_frequency <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
#pr_pn_no_concreteness <- lm(wordbank_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)
pr_pn_predictors <- complete_predictors %>%
select(num_item_id, word, morrison_aoa_threshold_years, wordbank_aoa_years, parent_minus_pn, preschoolness,
helpfulness, concreteness, childes_adult_log_freq) %>%
mutate(pr_pn_no_helpfulness.resid = pr_pn_no_helpfulness$resid,
pr_pn_no_frequency.resid = pr_pn_no_frequency$resid)
Plot Wordbank - picture-naming predictors individually
Helpfulness (r = -.24)
ggplot(pr_pn_predictors, aes(x = helpfulness, y = pr_pn_no_helpfulness.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Helpfulness", y = "Wordbank minus Picture-naming, controlling for freq.")

Frequency (r = .44)
ggplot(pr_pn_predictors, aes(x = childes_adult_log_freq, y = pr_pn_no_frequency.resid, label=word))+
geom_point()+
geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
geom_smooth(method="lm")+
theme_classic()+
labs(x = "Frequency", y = "Wordbank minus Picture-naming, controlling for helpf.")

Compare strength of predictors for different outcomes
Raw AoA values
kuperman_aoa <- lm(KupermanAoA ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
kuperman_aoa_plot <- tidy(kuperman_aoa) %>%
mutate(model = "Kuperman AoA, R2 = .33")
pn_aoa <- lm(morrison_aoa_threshold_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pn_aoa_plot <- tidy(pn_aoa) %>%
mutate(model = "Picture-naming AoA, R2 = .34")
pr_aoa <- lm(wordbank_aoa_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pr_aoa_plot <- tidy(pr_aoa) %>%
mutate(model = "Wordbank AoA, R2 = .55")
raw_plot <- bind_rows(kuperman_aoa_plot, pn_aoa_plot, pr_aoa_plot)
dwplot(raw_plot,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting AoA estimates (N = 115)") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())

Raw differences between AoAs
k_pn_plot <- tidy(k_pn_full) %>%
mutate(model = "kup - picture, R2 = .23") %>%
arrange(term)
k_pr_plot <- tidy(k_pr_full) %>%
mutate(model = "kup - wordbank, R2 = .17") %>%
arrange(term)
pr_pn_plot <- tidy(pr_pn_full) %>%
mutate(model = "wordbank - picture, R2 = .25") %>%
arrange(term)
differences_plot <- bind_rows(k_pn_plot, k_pr_plot, pr_pn_plot)
dwplot(differences_plot,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting AoA differences (N = 115)") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.65, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
