1 Zero-order correlations among predictors & outcomes
2 Get a sense of what words are most different for each measure
3 Get a sense of difference distributions
4 Predict Kuperman-Wordbank
- 4.1 Plot Kuperman-Wordbank predictors individually
  - 4.1.1 Preschoolness (r = -.28)
  - 4.1.2 Frequency (r = -.23)
5 Predict Kuperman - Picture-naming
- 5.1 Plot Kuperman - Picture naming predictors individually
6 Predict Wordbank - picture naming
- 6.1 Plot Wordbank - picture-naming predictors individually
  - 6.1.1 Helpfulness (r = -.24)
  - 6.1.2 Frequency (r = .44)
7 Compare strength of predictors for different outcomes
- 7.1 Raw AoA values
- 7.2 Raw differences between AoAs

complete_predictors <- read_csv("complete_predictors.csv") %>% 
  mutate(kuperman_minus_wordbank = KupermanAoA - wordbank_aoa_years)

full_predictor_list <- c("preschoolness","helpfulness","childes_adult_log_freq","concreteness")

write_model_formula <- function(outcome_var) {
  fmla_full <- as.formula(paste(as.character(outcome_var), paste(" ~ "), paste(full_predictor_list, collapse="+")))
}

1 Zero-order correlations among predictors & outcomes

data_forcorr <- complete_predictors %>% 
  select(KupermanAoA, wordbank_aoa = wordbank_aoa_years,
         picture_naming_aoa = morrison_aoa_threshold_years, babiness, preschoolness, helpfulness,
         frequency = childes_adult_log_freq, concreteness, generality = mean_generality)

corrs <- cor(data_forcorr, use="pairwise.complete.obs",method="pearson")
pmat <- cor.mtest(data_forcorr, method="pearson")
pval <- pmat$p

corrplot(corrs, method="color", type="lower", addCoef.col = TRUE,
         tl.col="black", diag = FALSE, p.mat=pval, sig.level=.05, insig="blank",number.cex = .7)

Note: although generality is correlated with other variables, it wasn’t a significant predictor in any models tested (raw AoAs or differences between different AoA measures), so it’s not included in any further analyses here.

2 Get a sense of what words are most different for each measure

2.1 Wordbank/Kuperman

ggplot(complete_predictors, aes(x = wordbank_aoa_years, y = KupermanAoA, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,6)+
  ylim(0,10)+
  labs(x = "Wordbank AoA")

2.2 Picture-naming/Kuperman

ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = KupermanAoA, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,10)+
  ylim(0,10)+
  labs(x = "Picture-naming AoA")

2.3 Picture-naming/Wordbank

ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = wordbank_aoa_years, label=word))+
  geom_point()+
  geom_text_repel(segment.alpha=.3, size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_classic()+
  xlim(0,6)+
  ylim(0,10)+
  labs(x = "Picture-naming AoA", y = "Wordbank AoA")

3 Get a sense of difference distributions

3.1 Wordbank/Kuperman

hist(complete_predictors$kuperman_minus_wordbank)

3.2 Kuperman/picture-naming

hist(complete_predictors$kuperman_minus_pn)

3.3 Wordbank/picture-naming

hist(complete_predictors$wordbank_minus_pn)

4 Predict Kuperman-Wordbank

k_pr_full_fm <- write_model_formula("kuperman_minus_wordbank")

k_pr_full <- lm(k_pr_full_fm, complete_predictors)
k_pr_preschoolness <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
k_pr_helpfulness <- lm(kuperman_minus_wordbank ~ helpfulness, complete_predictors)
k_pr_frequency <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
k_pr_concreteness <- lm(kuperman_minus_wordbank ~ concreteness, complete_predictors)

tab_model(k_pr_full, k_pr_preschoolness, k_pr_helpfulness, k_pr_frequency, k_pr_concreteness)

	kuperman minus wordbank			kuperman minus wordbank			kuperman minus wordbank			kuperman minus wordbank			kuperman minus wordbank
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	2.31	-3.91 – 8.53	0.464	3.21	2.77 – 3.66	<0.001	2.79	2.01 – 3.57	<0.001	3.94	3.07 – 4.82	<0.001	1.23	-5.59 – 8.05	0.721
preschoolness	-0.32	-0.52 – -0.13	0.001	-0.40	-0.59 – -0.22	<0.001
helpfulness	-0.11	-0.33 – 0.11	0.330				-0.15	-0.39 – 0.08	0.195
childes_adult_log_freq	-0.15	-0.28 – -0.03	0.016							-0.23	-0.35 – -0.11	<0.001
concreteness	0.44	-0.83 – 1.72	0.490										0.21	-1.18 – 1.61	0.761
Observations	115			115			115			115			115
R² / R² adjusted	0.202 / 0.173			0.142 / 0.135			0.015 / 0.006			0.113 / 0.105			0.001 / -0.008

# run models to produce residuals, to then correlate with missing predictor. e.g. k_pr_no_preschoolness will let us correlate preschoolness with kuperman_minus_wordbank, controlling for frequency
k_pr_no_preschoolness <- lm(kuperman_minus_wordbank ~ childes_adult_log_freq, complete_predictors)
# k_pr_no_helpfulness <- lm(kuperman_minus_wordbank ~ concreteness + preschoolness + childes_adult_log_freq, complete_predictors)
k_pr_no_frequency <- lm(kuperman_minus_wordbank ~ preschoolness, complete_predictors)
# k_pr_no_concreteness <- lm(kuperman_minus_wordbank ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)

k_pr_predictors <- complete_predictors %>% 
  select(num_item_id, word, KupermanAoA, parentreport_calculated_aoa_years, kuperman_minus_wordbank, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(k_pr_no_preschoolness.resid = k_pr_no_preschoolness$resid,
         k_pr_no_frequency.resid = k_pr_no_frequency$resid)

4.1 Plot Kuperman-Wordbank predictors individually

4.1.1 Preschoolness (r = -.28)

ggplot(k_pr_predictors, aes(x = preschoolness, y = k_pr_no_preschoolness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Preschoolness", y = "Kuperman minus Wordbank, controlling for freq.")

4.1.2 Frequency (r = -.23)

ggplot(k_pr_predictors, aes(x = childes_adult_log_freq, y = k_pr_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Kuperman minus Wordbank, controlling for presch.")

5 Predict Kuperman - Picture-naming

k_pn_full_fm <- write_model_formula("kuperman_minus_pn")

k_pn_full <- lm(k_pn_full_fm, complete_predictors)
k_pn_preschoolness <- lm(kuperman_minus_pn ~ preschoolness, complete_predictors)
k_pn_helpfulness <- lm(kuperman_minus_pn ~ helpfulness, complete_predictors)
k_pn_frequency <- lm(kuperman_minus_pn ~ childes_adult_log_freq, complete_predictors)
k_pn_concreteness <- lm(kuperman_minus_pn ~ concreteness, complete_predictors)

tab_model(k_pn_full, k_pn_preschoolness, k_pn_helpfulness, k_pn_frequency, k_pn_concreteness)

	kuperman minus pn			kuperman minus pn			kuperman minus pn			kuperman minus pn			kuperman minus pn
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	-13.38	-25.61 – -1.15	0.032	2.10	1.14 – 3.05	<0.001	2.83	1.26 – 4.40	0.001	-1.32	-3.16 – 0.53	0.161	-10.72	-24.46 – 3.03	0.125
preschoolness	-0.77	-1.15 – -0.39	<0.001	-0.49	-0.88 – -0.09	0.017
helpfulness	-0.73	-1.16 – -0.31	0.001				-0.56	-1.03 – -0.09	0.019
childes_adult_log_freq	0.54	0.29 – 0.79	<0.001							0.32	0.07 – 0.58	0.014
concreteness	3.00	0.50 – 5.50	0.019										2.39	-0.42 – 5.20	0.095
Observations	115			115			115			115			115
R² / R² adjusted	0.258 / 0.232			0.050 / 0.041			0.047 / 0.039			0.052 / 0.044			0.025 / 0.016

# run models to produce residuals, to then correlate with missing predictor. e.g. k_pn_no_preschoolness will let us correlate preschoolness with kuperman_minus_pn, controlling for frequency, and helpfulness
k_pn_no_preschoolness <- lm(kuperman_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)
k_pn_no_helpfulness <- lm(kuperman_minus_pn ~ preschoolness + childes_adult_log_freq, complete_predictors)
k_pn_no_frequency <- lm(kuperman_minus_pn ~ helpfulness + preschoolness, complete_predictors)

# will not run this model for this DV because it wasn't predictive in the full model - not enough variability
#k_pn_no_concreteness <- lm(kuperman_minus_pn ~ preschoolness + helpfulness + childes_adult_log_freq, complete_predictors)

k_pn_predictors <- complete_predictors %>% 
  select(num_item_id, word, KupermanAoA, morrison_aoa_threshold_years, kuperman_minus_pn, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(k_pn_no_preschoolness.resid = k_pn_no_preschoolness$resid,
         k_pn_no_helpfulness.resid = k_pn_no_helpfulness$resid,
         k_pn_no_frequency.resid = k_pn_no_frequency$resid)

5.1 Plot Kuperman - Picture naming predictors individually

5.1.1 Preschoolness (r = -.24)

ggplot(k_pn_predictors, aes(x = preschoolness, y = k_pn_no_preschoolness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Preschoolness", y = "Kuperman minus picture-naming, controlling for freq./helpf.")

5.1.2 Helpfulness (r = -.19)

ggplot(k_pn_predictors, aes(x = helpfulness, y = k_pn_no_helpfulness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Helpfulness", y = "Kuperman minus picture-naming, controlling for freq./presch.")

5.1.3 Frequency (r = .34)

ggplot(k_pn_predictors, aes(x = childes_adult_log_freq, y = k_pn_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Kuperman minus picture-naming, controlling for presch./helpf.")

6 Predict Wordbank - picture naming

pr_pn_full_fm <- write_model_formula("wordbank_minus_pn")

pr_pn_full <- lm(pr_pn_full_fm, complete_predictors)
pr_pn_preschoolness <- lm(wordbank_minus_pn ~ preschoolness, complete_predictors)
pr_pn_helpfulness <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
pr_pn_frequency <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_concreteness <- lm(wordbank_minus_pn ~ concreteness, complete_predictors)

tab_model(pr_pn_full, pr_pn_preschoolness, pr_pn_helpfulness, pr_pn_frequency, pr_pn_concreteness)

	wordbank minus pn			wordbank minus pn			wordbank minus pn			wordbank minus pn			wordbank minus pn
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	-15.69	-27.37 – -4.01	0.009	-1.11	-2.06 – -0.16	0.022	0.04	-1.50 – 1.57	0.961	-5.26	-6.94 – -3.58	<0.001	-11.95	-25.30 – 1.40	0.079
preschoolness	-0.44	-0.80 – -0.08	0.016	-0.08	-0.48 – 0.31	0.679
helpfulness	-0.63	-1.04 – -0.22	0.003				-0.41	-0.87 – 0.05	0.082
childes_adult_log_freq	0.69	0.46 – 0.93	<0.001							0.55	0.32 – 0.79	<0.001
concreteness	2.55	0.17 – 4.94	0.036										2.17	-0.55 – 4.90	0.117
Observations	115			115			115			115			115
R² / R² adjusted	0.281 / 0.254			0.002 / -0.007			0.027 / 0.018			0.164 / 0.157			0.022 / 0.013

# run models to produce residuals, to then correlate with missing predictor. e.g. pr_pn_no_helpfulness will let us correlate helpfulness with pn_minus_parentreport, controlling for frequency & concreteness

#did not run preschoolness model because wasn't significant in full model above
#pr_pn_no_preschoolness <- lm(pn_minus_parentreport ~ concreteness + helpfulness + childes_adult_log_freq, complete_predictors)
pr_pn_no_helpfulness <- lm(wordbank_minus_pn ~ childes_adult_log_freq, complete_predictors)
pr_pn_no_frequency <- lm(wordbank_minus_pn ~ helpfulness, complete_predictors)
#pr_pn_no_concreteness <- lm(wordbank_minus_pn ~ helpfulness + childes_adult_log_freq, complete_predictors)

pr_pn_predictors <- complete_predictors %>% 
  select(num_item_id, word, morrison_aoa_threshold_years, wordbank_aoa_years, parent_minus_pn, preschoolness, 
         helpfulness, concreteness, childes_adult_log_freq) %>% 
  mutate(pr_pn_no_helpfulness.resid = pr_pn_no_helpfulness$resid,
         pr_pn_no_frequency.resid = pr_pn_no_frequency$resid)

6.1 Plot Wordbank - picture-naming predictors individually

6.1.1 Helpfulness (r = -.24)

ggplot(pr_pn_predictors, aes(x = helpfulness, y = pr_pn_no_helpfulness.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Helpfulness", y = "Wordbank minus Picture-naming, controlling for freq.")

6.1.2 Frequency (r = .44)

ggplot(pr_pn_predictors, aes(x = childes_adult_log_freq, y = pr_pn_no_frequency.resid, label=word))+
  geom_point()+
  geom_text_repel(size = 3, alpha = .6, segment.alpha = .3)+
  geom_smooth(method="lm")+
  theme_classic()+
  labs(x = "Frequency", y = "Wordbank minus Picture-naming, controlling for helpf.")

7 Compare strength of predictors for different outcomes

7.1 Raw AoA values

kuperman_aoa <- lm(KupermanAoA ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
kuperman_aoa_plot <- tidy(kuperman_aoa) %>% 
  mutate(model = "Kuperman AoA, R2 = .33")

pn_aoa <- lm(morrison_aoa_threshold_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pn_aoa_plot <- tidy(pn_aoa) %>% 
  mutate(model = "Picture-naming AoA, R2 = .34")

pr_aoa <- lm(wordbank_aoa_years ~ preschoolness + concreteness + helpfulness + childes_adult_log_freq, data = complete_predictors)
pr_aoa_plot <- tidy(pr_aoa) %>% 
  mutate(model = "Wordbank AoA, R2 = .55")

raw_plot <- bind_rows(kuperman_aoa_plot, pn_aoa_plot, pr_aoa_plot)
dwplot(raw_plot,
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
    relabel_predictors(childes_adult_log_freq = "frequency") +
     theme_bw() + xlab("Coefficient Estimate") + ylab("") +
     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
     ggtitle("Predicting AoA estimates (N = 115)") +
     theme(plot.title = element_text(face="bold"),
           legend.position = c(0.007, 0.01),
           legend.justification = c(0, 0), 
           legend.background = element_rect(colour="grey80"),
           legend.title = element_blank())

7.2 Raw differences between AoAs

k_pn_plot <- tidy(k_pn_full) %>% 
  mutate(model = "kup - picture, R2 = .23") %>% 
  arrange(term)

k_pr_plot <- tidy(k_pr_full) %>% 
  mutate(model = "kup - wordbank, R2 = .17") %>% 
  arrange(term)

pr_pn_plot <- tidy(pr_pn_full) %>% 
  mutate(model = "wordbank - picture, R2 = .25") %>% 
  arrange(term)

differences_plot <- bind_rows(k_pn_plot, k_pr_plot, pr_pn_plot)

dwplot(differences_plot,
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
    relabel_predictors(childes_adult_log_freq = "frequency") +
     theme_bw() + xlab("Coefficient Estimate") + ylab("") +
     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
     ggtitle("Predicting AoA differences (N = 115)") +
     theme(plot.title = element_text(face="bold"),
           legend.position = c(0.65, 0.01),
           legend.justification = c(0, 0), 
           legend.background = element_rect(colour="grey80"),
           legend.title = element_blank())

Visualizations for AoA project - raw differences between AoA estimates

Christina

2020-08-06