complete_predictors <- read_csv("complete_predictors.csv") %>% 
  mutate(kuperman_minus_wordbank = KupermanAoA - wordbank_aoa_years)

full_predictor_list <- c("preschoolness","helpfulness","childes_adult_log_freq","concreteness")

write_model_formula <- function(outcome_var) {
  fmla_full <- as.formula(paste(as.character(outcome_var), paste(" ~ "), paste(full_predictor_list, collapse="+")))
}

1 Zero-order correlations among predictors & outcomes

data_forcorr <- complete_predictors %>% 
  select(KupermanAoA, wordbank_aoa = wordbank_aoa_years,
         picture_naming_aoa = morrison_aoa_threshold_years, babiness, preschoolness, helpfulness,
         frequency = childes_adult_log_freq, concreteness, generality = mean_generality)

corrs <- cor(data_forcorr, use="pairwise.complete.obs",method="pearson")
pmat <- cor.mtest(data_forcorr, method="pearson")
pval <- pmat$p

corrplot(corrs, method="color", type="lower", addCoef.col = TRUE,
         tl.col="black", diag = FALSE, p.mat=pval, sig.level=.05, insig="blank",number.cex = .7)

Note: although generality is correlated with other variables, it wasn’t a significant predictor in any models tested (raw AoAs or differences between different AoA measures), so it’s not included in any further analyses here.

2 Get a sense of what words are most different for each measure

2.1 Wordbank/Kuperman

ggplot(complete_predictors, aes(x = wordbank_aoa_years, y = KupermanAoA, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,6)+
  ylim(0,10)+
  labs(x = "Wordbank AoA")

2.2 Picture-naming/Kuperman

ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = KupermanAoA, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,10)+
  ylim(0,10)+
  labs(x = "Picture-naming AoA")

2.3 Picture-naming/Wordbank

ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = wordbank_aoa_years, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,6)+
  ylim(0,10)+
  labs(x = "Picture-naming AoA", y = "Wordbank AoA")

3 Get a sense of difference distributions

3.1 Wordbank/Kuperman

hist(complete_predictors$kuperman_minus_wordbank)

3.2 Kuperman/picture-naming

hist(complete_predictors$kuperman_minus_pn)

3.3 Wordbank/picture-naming

hist(complete_predictors$wordbank_minus_pn)

4 Predict Kuperman-Wordbank

k_pr_full_fm <- write_model_formula("kuperman_minus_wordbank")

k_pr_full <- lm(k_pr_full_fm, complete_predictors)
k_pr_preschoolness <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
k_pr_helpfulness <- lm(kuperman_minus_wordbank ~ helpfulness, complete_predictors)
k_pr_frequency <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
k_pr_concreteness <- lm(kuperman_minus_wordbank ~ concreteness, complete_predictors)

tab_model(k_pr_full, k_pr_preschoolness, k_pr_helpfulness, k_pr_frequency, k_pr_concreteness)
  kuperman minus wordbank kuperman minus wordbank kuperman minus wordbank kuperman minus wordbank kuperman minus wordbank
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 2.31 -3.91 – 8.53 0.464 3.21 2.77 – 3.66 <0.001 2.79 2.01 – 3.57 <0.001 3.94 3.07 – 4.82 <0.001 1.23 -5.59 – 8.05 0.721
preschoolness -0.32 -0.52 – -0.13 0.001 -0.40 -0.59 – -0.22 <0.001
helpfulness -0.11 -0.33 – 0.11 0.330 -0.15 -0.39 – 0.08 0.195
childes_adult_log_freq -0.15 -0.28 – -0.03 0.016 -0.23 -0.35 – -0.11 <0.001
concreteness 0.44 -0.83 – 1.72 0.490 0.21 -1.18 – 1.61 0.761
Observations 115 115 115 115 115
R2 / R2 adjusted 0.202 / 0.173 0.142 / 0.135 0.015 / 0.006 0.113 / 0.105 0.001 / -0.008
# run models to produce residuals, to then correlate with missing predictor. e.g. k_pr_no_preschoolness will let us correlate preschoolness with kuperman_minus_wordbank, controlling for frequency
k_pr_no_preschoolness <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
# k_pr_no_helpfulness <- lm(kuperman_minus_wordbank ~ concreteness + preschoolness + childes_adult_log_freq, complete_predictors)
k_pr_no_frequency <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
# k_pr_no_concreteness <- lm(kuperman_minus_wordbank ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)

k_pr_predictors <- complete_predictors %>% 
  select(num_item_id, word, KupermanAoA, parentreport_calculated_aoa_years, kuperman_minus_wordbank, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(k_pr_no_preschoolness.resid = k_pr_no_preschoolness$resid,
         k_pr_no_frequency.resid = k_pr_no_frequency$resid)

4.1 Plot Kuperman-Wordbank predictors individually

4.1.1 Preschoolness (r = -.28)

ggplot(k_pr_predictors, aes(x = preschoolness, y = k_pr_no_preschoolness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Preschoolness", y = "Kuperman minus Wordbank, controlling for freq.")

4.1.2 Frequency (r = -.23)

ggplot(k_pr_predictors, aes(x = childes_adult_log_freq, y = k_pr_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Kuperman minus Wordbank, controlling for presch.")

5 Predict Kuperman - Picture-naming

k_pn_full_fm <- write_model_formula("kuperman_minus_pn")

k_pn_full <- lm(k_pn_full_fm, complete_predictors)
k_pn_preschoolness <- lm(kuperman_minus_pn ~ preschoolness, complete_predictors)
k_pn_helpfulness <- lm(kuperman_minus_pn ~ helpfulness, complete_predictors)
k_pn_frequency <- lm(kuperman_minus_pn ~ childes_adult_log_freq, complete_predictors)
k_pn_concreteness <- lm(kuperman_minus_pn ~ concreteness, complete_predictors)

tab_model(k_pn_full, k_pn_preschoolness, k_pn_helpfulness, k_pn_frequency, k_pn_concreteness)
  kuperman minus pn kuperman minus pn kuperman minus pn kuperman minus pn kuperman minus pn
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) -13.38 -25.61 – -1.15 0.032 2.10 1.14 – 3.05 <0.001 2.83 1.26 – 4.40 0.001 -1.32 -3.16 – 0.53 0.161 -10.72 -24.46 – 3.03 0.125
preschoolness -0.77 -1.15 – -0.39 <0.001 -0.49 -0.88 – -0.09 0.017
helpfulness -0.73 -1.16 – -0.31 0.001 -0.56 -1.03 – -0.09 0.019
childes_adult_log_freq 0.54 0.29 – 0.79 <0.001 0.32 0.07 – 0.58 0.014
concreteness 3.00 0.50 – 5.50 0.019 2.39 -0.42 – 5.20 0.095
Observations 115 115 115 115 115
R2 / R2 adjusted 0.258 / 0.232 0.050 / 0.041 0.047 / 0.039 0.052 / 0.044 0.025 / 0.016
# run models to produce residuals, to then correlate with missing predictor. e.g. k_pn_no_preschoolness will let us correlate preschoolness with kuperman_minus_pn, controlling for frequency, and helpfulness
k_pn_no_preschoolness <- lm(kuperman_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)
k_pn_no_helpfulness <- lm(kuperman_minus_pn ~ preschoolness + childes_adult_log_freq, complete_predictors)
k_pn_no_frequency <- lm(kuperman_minus_pn ~ helpfulness + preschoolness, complete_predictors)

# will not run this model for this DV because it wasn't predictive in the full model - not enough variability
#k_pn_no_concreteness <- lm(kuperman_minus_pn ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)

k_pn_predictors <- complete_predictors %>% 
  select(num_item_id, word, KupermanAoA, morrison_aoa_threshold_years, kuperman_minus_pn, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(k_pn_no_preschoolness.resid = k_pn_no_preschoolness$resid,
         k_pn_no_helpfulness.resid = k_pn_no_helpfulness$resid,
         k_pn_no_frequency.resid = k_pn_no_frequency$resid)

5.1 Plot Kuperman - Picture naming predictors individually

5.1.1 Preschoolness (r = -.24)

ggplot(k_pn_predictors, aes(x = preschoolness, y = k_pn_no_preschoolness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Preschoolness", y = "Kuperman minus picture-naming, controlling for freq./helpf.")

5.1.2 Helpfulness (r = -.19)

ggplot(k_pn_predictors, aes(x = helpfulness, y = k_pn_no_helpfulness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Helpfulness", y = "Kuperman minus picture-naming, controlling for freq./presch.")

5.1.3 Frequency (r = .34)

ggplot(k_pn_predictors, aes(x = childes_adult_log_freq, y = k_pn_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Kuperman minus picture-naming, controlling for presch./helpf.")

6 Predict Wordbank - picture naming

pr_pn_full_fm <- write_model_formula("wordbank_minus_pn")

pr_pn_full <- lm(pr_pn_full_fm, complete_predictors)
pr_pn_preschoolness <- lm(wordbank_minus_pn ~ preschoolness, complete_predictors)
pr_pn_helpfulness <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
pr_pn_frequency <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_concreteness <- lm(wordbank_minus_pn ~ concreteness, complete_predictors)

tab_model(pr_pn_full, pr_pn_preschoolness, pr_pn_helpfulness, pr_pn_frequency, pr_pn_concreteness)
  wordbank minus pn wordbank minus pn wordbank minus pn wordbank minus pn wordbank minus pn
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) -15.69 -27.37 – -4.01 0.009 -1.11 -2.06 – -0.16 0.022 0.04 -1.50 – 1.57 0.961 -5.26 -6.94 – -3.58 <0.001 -11.95 -25.30 – 1.40 0.079
preschoolness -0.44 -0.80 – -0.08 0.016 -0.08 -0.48 – 0.31 0.679
helpfulness -0.63 -1.04 – -0.22 0.003 -0.41 -0.87 – 0.05 0.082
childes_adult_log_freq 0.69 0.46 – 0.93 <0.001 0.55 0.32 – 0.79 <0.001
concreteness 2.55 0.17 – 4.94 0.036 2.17 -0.55 – 4.90 0.117
Observations 115 115 115 115 115
R2 / R2 adjusted 0.281 / 0.254 0.002 / -0.007 0.027 / 0.018 0.164 / 0.157 0.022 / 0.013
# run models to produce residuals, to then correlate with missing predictor. e.g. pr_pn_no_helpfulness will let us correlate helpfulness with pn_minus_parentreport, controlling for frequency & concreteness

#did not run preschoolness model because wasn't significant in full model above
#pr_pn_no_preschoolness <- lm(pn_minus_parentreport ~ concreteness + helpfulness + childes_adult_log_freq, complete_predictors)
pr_pn_no_helpfulness <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_no_frequency <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
#pr_pn_no_concreteness <- lm(wordbank_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)

pr_pn_predictors <- complete_predictors %>% 
  select(num_item_id, word, morrison_aoa_threshold_years, wordbank_aoa_years, parent_minus_pn, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(pr_pn_no_helpfulness.resid = pr_pn_no_helpfulness$resid,
         pr_pn_no_frequency.resid = pr_pn_no_frequency$resid)

6.1 Plot Wordbank - picture-naming predictors individually

6.1.1 Helpfulness (r = -.24)

ggplot(pr_pn_predictors, aes(x = helpfulness, y = pr_pn_no_helpfulness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Helpfulness", y = "Wordbank minus Picture-naming, controlling for freq.")

6.1.2 Frequency (r = .44)

ggplot(pr_pn_predictors, aes(x = childes_adult_log_freq, y = pr_pn_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Wordbank minus Picture-naming, controlling for helpf.")

7 Compare strength of predictors for different outcomes

7.1 Raw AoA values

kuperman_aoa <- lm(KupermanAoA ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
kuperman_aoa_plot <- tidy(kuperman_aoa) %>% 
  mutate(model = "Kuperman AoA, R2 = .33")

pn_aoa <- lm(morrison_aoa_threshold_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pn_aoa_plot <- tidy(pn_aoa) %>% 
  mutate(model = "Picture-naming AoA, R2 = .34")

pr_aoa <- lm(wordbank_aoa_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pr_aoa_plot <- tidy(pr_aoa) %>% 
  mutate(model = "Wordbank AoA, R2 = .55")

raw_plot <- bind_rows(kuperman_aoa_plot, pn_aoa_plot, pr_aoa_plot)
dwplot(raw_plot,
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
    relabel_predictors(childes_adult_log_freq = "frequency") +
     theme_bw() + xlab("Coefficient Estimate") + ylab("") +
     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
     ggtitle("Predicting AoA estimates (N = 115)") +
     theme(plot.title = element_text(face="bold"),
           legend.position = c(0.007, 0.01),
           legend.justification = c(0, 0), 
           legend.background = element_rect(colour="grey80"),
           legend.title = element_blank())

7.2 Raw differences between AoAs

k_pn_plot <- tidy(k_pn_full) %>% 
  mutate(model = "kup - picture, R2 = .23") %>% 
  arrange(term)

k_pr_plot <- tidy(k_pr_full) %>% 
  mutate(model = "kup - wordbank, R2 = .17") %>% 
  arrange(term)

pr_pn_plot <- tidy(pr_pn_full) %>% 
  mutate(model = "wordbank - picture, R2 = .25") %>% 
  arrange(term)

differences_plot <- bind_rows(k_pn_plot, k_pr_plot, pr_pn_plot)

dwplot(differences_plot,
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
    relabel_predictors(childes_adult_log_freq = "frequency") +
     theme_bw() + xlab("Coefficient Estimate") + ylab("") +
     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
     ggtitle("Predicting AoA differences (N = 115)") +
     theme(plot.title = element_text(face="bold"),
           legend.position = c(0.65, 0.01),
           legend.justification = c(0, 0), 
           legend.background = element_rect(colour="grey80"),
           legend.title = element_blank())