generality <- read_csv("generality_ratings_byWord_combined.csv") %>%
select(-X1, -num_item_id_wrong) %>%
rename(word_generality=word)
data <- read_csv("all_variables_for_aoa_regressions_clean.csv") %>%
left_join(generality, by="num_item_id") %>%
mutate(kuperman_minus_pn = KupermanAoA - morrison_aoa_threshold_years,
parent_minus_pn = parentreport_calculated_aoa_years - morrison_aoa_threshold_years)
complete_generality <- data %>% filter(!is.na(mean_generality))
### left childes out as a filter here because when it's included, that only leaves us with 23 words
complete_predictors_nowordbank <- data %>% filter(!is.na(morrison_aoa_threshold_years))
# word info: where are we losing things?
# picture-naming: have for 128 words - but there are 300 originally tested. It turns out only 128 words overlap with the parent-report measures (10 of our words + 118 on CDI)
# get CDI threshold info (e.g. Thill & Twomey 2016) to look at parent-report AoA that way
wordbank_items <- get_item_data(language="English (American)", form="WS")
wordbank_threshold_aoa <- summarise_items(wordbank_items) %>%
group_by(uni_lemma) %>%
filter(production >=.5) %>%
filter(production==min(production)) %>%
cSplit(., "item_id", sep="_") %>%
mutate(num_item_id = item_id_2 +1) %>%
select(num_item_id, uni_lemma, wordbank_threshold_age = age, n_wordbank_children = n_children, wordbank_pct_producing_at_aoa = production)
wordbank_threshold_aoa$num_item_id <- as.character(wordbank_threshold_aoa$num_item_id)
naive_sona <- read_csv("../6_naive_adult_cdi/naive_sona_aoas.csv") %>%
group_by(word) %>%
filter(prop_say >= .5) %>%
filter(age_tested==min(age_tested)) %>%
mutate(sona_aoa = age_tested/12) %>%
select(word, sona_aoa)
complete_predictors <- complete_predictors_nowordbank %>%
left_join(wordbank_threshold_aoa, by="num_item_id") %>%
left_join(naive_sona, by="word") %>%
mutate(sona_aoa_years = ifelse(is.na(sona_aoa), 4, sona_aoa)) %>%
select(-sona_aoa) %>%
filter(!is.na(wordbank_threshold_age)) %>%
mutate(wordbank_aoa_years = wordbank_threshold_age/12,
wordbank_minus_pn = wordbank_aoa_years - morrison_aoa_threshold_years,
wordbank_minus_kuperman = wordbank_aoa_years - KupermanAoA,
wordbank_minus_sona = wordbank_aoa_years - sona_aoa_years,
sona_minus_pn = sona_aoa_years - morrison_aoa_threshold_years,
sona_minus_kuperman = sona_aoa_years - KupermanAoA,
concreteness_centered = scale(concreteness, scale=FALSE),
preschoolness_centered = scale(preschoolness, scale=FALSE),
frequency_centered = scale(childes_adult_log_freq, scale=FALSE),
helpfulness_centered = scale(helpfulness, scale=FALSE))
#13 words don't meet 50% production criteria by 30 months
#write csv for loading into viz script
#write_csv(complete_predictors, "complete_predictors.csv")
full_predictor_list <- c("preschoolness","helpfulness","childes_adult_log_freq","concreteness")
centered_predictor_list <- c("preschoolness_centered","helpfulness_centered","concreteness_centered","frequency_centered")
write_model_formula <- function(outcome_var) {
fmla_full <- as.formula(paste(as.character(outcome_var), paste(" ~ "), paste(full_predictor_list, collapse="+")))
}
Codebook:
wordbank_thresh_aoa: AoA, determined by youngest age in Wordbank (CDI) at which 50% of kids produce word
KupermanAoA: AoA from Kuperman norms (adult retrospective self-report, unconstrained)
picture_naming_aoa: AoA from Morrison et al. (1997), had actual children name pictures. AoA is youngest age at which 75% of kids named picture correctly. Youngest age tested was 3 years so AoAs <3 are extrapolated from CDI norms :/
naive_adult_aoa: AoA from a survey of college students (N=36 so far). Were randomized to complete CDI checklist for either an 18-month-old or 24-month-old. AoA is youngest age at which 50% of raters said child would produce word so possible values are 1.5 or 2 years. For words that didn’t reach 50% threshold, put AoA as 4 years.
babiness and preschoolness: on a scale of 1-5, how much is the word associated with babies/preschoolers (MTurk)
helpfulness: on a scale of 1-5, how helpful would it be for a preschooler to know this word (MTurk)
frequency: log frequency based on adult speech in CHILDES
concreteness: concreteness norms from Brysbaert et al. - adults asked to rate on a scale of 1-5
generality: on a scale of 1-5, how general is this word (MTurk)
data_forcorr <- complete_predictors %>%
select(KupermanAoA, wordbank_thresh_aoa = wordbank_aoa_years,
picture_naming_aoa = morrison_aoa_threshold_years, naive_adult_aoa = sona_aoa_years,
babiness, preschoolness, helpfulness,
frequency = childes_adult_log_freq, concreteness, generality = mean_generality)
corrs <- cor(data_forcorr, use="pairwise.complete.obs",method="pearson")
pmat <- cor.mtest(data_forcorr, method="pearson")
pval <- pmat$p
corrplot(corrs, method="color", type="lower", addCoef.col = TRUE,
tl.col="black", diag = FALSE, p.mat=pval, sig.level=.05, insig="blank",number.cex = .7)
Note: although generality is correlated with other variables, it wasn’t a significant predictor in any models tested (raw AoAs or differences between different AoA measures), so it’s not included in any further analyses here.
Kuperman and naive adults predict Wordbank, but picture-naming doesn’t.
pr_kup <- lm(wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
KupermanAoA, complete_predictors)
summary(pr_kup)
##
## Call:
## lm(formula = wordbank_aoa_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + KupermanAoA, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41163 -0.13177 0.00245 0.11739 0.51277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.294873 0.845642 5.079 1.58e-06 ***
## preschoolness -0.030126 0.027151 -1.110 0.269623
## helpfulness 0.009449 0.029107 0.325 0.746083
## childes_adult_log_freq -0.144526 0.018367 -7.869 2.81e-12 ***
## concreteness -0.352973 0.169164 -2.087 0.039260 *
## KupermanAoA 0.093548 0.023625 3.960 0.000134 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1838 on 109 degrees of freedom
## Multiple R-squared: 0.6205, Adjusted R-squared: 0.6031
## F-statistic: 35.65 on 5 and 109 DF, p-value: < 2.2e-16
pr_kup_plot <- tidy(pr_kup) %>%
mutate(model = "kuperman, N = 115, R2 = .6") %>%
arrange(term)
pr_picturename <- lm(wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
morrison_aoa_threshold_years, complete_predictors)
summary(pr_picturename)
##
## Call:
## lm(formula = wordbank_aoa_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + morrison_aoa_threshold_years,
## data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45553 -0.11640 -0.00363 0.11843 0.46944
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.59665 0.92577 4.965 2.55e-06 ***
## preschoolness -0.07361 0.02762 -2.665 0.00887 **
## helpfulness -0.01219 0.03201 -0.381 0.70412
## childes_adult_log_freq -0.15940 0.02150 -7.415 2.79e-11 ***
## concreteness -0.29029 0.18410 -1.577 0.11775
## morrison_aoa_threshold_years 0.01838 0.01411 1.303 0.19540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.195 on 109 degrees of freedom
## Multiple R-squared: 0.5726, Adjusted R-squared: 0.553
## F-statistic: 29.2 on 5 and 109 DF, p-value: < 2.2e-16
pr_picturename_plot <- tidy(pr_picturename) %>%
mutate(model = "picture-naming, N = 115, R2 = .55") %>%
arrange(term)
pr_naive <- lm(wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
sona_aoa_years, complete_predictors)
summary(pr_naive)
##
## Call:
## lm(formula = wordbank_aoa_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + sona_aoa_years, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41282 -0.09905 0.00566 0.11030 0.42198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.61422 0.82091 5.621 1.48e-07 ***
## preschoolness -0.05608 0.02531 -2.216 0.0287 *
## helpfulness 0.01555 0.02884 0.539 0.5909
## childes_adult_log_freq -0.12694 0.01980 -6.412 3.79e-09 ***
## concreteness -0.39581 0.16728 -2.366 0.0197 *
## sona_aoa_years 0.08135 0.01860 4.373 2.81e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1813 on 109 degrees of freedom
## Multiple R-squared: 0.6307, Adjusted R-squared: 0.6138
## F-statistic: 37.24 on 5 and 109 DF, p-value: < 2.2e-16
pr_naive_plot <- tidy(pr_naive) %>%
mutate(model = "naive_adults, N = 115, R2 = .61") %>%
arrange(term)
print(anova(pr_kup, pr_picturename, pr_naive))
## Analysis of Variance Table
##
## Model 1: wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + KupermanAoA
## Model 2: wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + morrison_aoa_threshold_years
## Model 3: wordbank_aoa_years ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + sona_aoa_years
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 109 3.6806
## 2 109 4.1455 0 -0.46489
## 3 109 3.5816 0 0.56391
# pr_childes <- lm(wordbank_threshold_age ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
# childes_est_aoa_years, complete_predictors)
# summary(pr_childes)
# pr_childes_plot <- tidy(pr_childes) %>%
# mutate(model = "childes, N = 23, R2 = .34") %>%
# arrange(term)
pr_plots <- bind_rows(pr_kup_plot, pr_picturename_plot, pr_naive_plot)
dwplot(pr_plots,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Wordbank AoA") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
Wordbank, naive, and picture-naming all significant predictors of Kuperman (though estimate for picture-naming much smaller).
kup_pr <- lm(KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness + wordbank_aoa_years, complete_predictors)
summary(kup_pr)
##
## Call:
## lm(formula = KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + wordbank_aoa_years, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.58950 -0.41479 -0.00181 0.40851 2.64383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.59585 3.56436 0.167 0.867548
## preschoolness -0.30093 0.09941 -3.027 0.003080 **
## helpfulness -0.10741 0.10991 -0.977 0.330600
## childes_adult_log_freq -0.09414 0.08672 -1.086 0.280048
## concreteness 0.56282 0.65172 0.864 0.389707
## wordbank_aoa_years 1.34429 0.33949 3.960 0.000134 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6966 on 109 degrees of freedom
## Multiple R-squared: 0.4366, Adjusted R-squared: 0.4107
## F-statistic: 16.89 on 5 and 109 DF, p-value: 2.462e-12
kup_pr_plot <- tidy(kup_pr) %>%
mutate(model = "Wordbank, N = 115, R2 = .41") %>%
arrange(term)
kup_picturename <- lm(KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
morrison_aoa_threshold_years, complete_predictors)
summary(kup_picturename)
##
## Call:
## lm(formula = KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + morrison_aoa_threshold_years, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35676 -0.45309 -0.05726 0.42251 3.04352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.84537 3.45784 1.401 0.16397
## preschoolness -0.43518 0.10316 -4.218 5.1e-05 ***
## helpfulness -0.18225 0.11957 -1.524 0.13036
## childes_adult_log_freq -0.22720 0.08029 -2.830 0.00555 **
## concreteness 0.44308 0.68765 0.644 0.52071
## morrison_aoa_threshold_years 0.11809 0.05269 2.241 0.02705 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7284 on 109 degrees of freedom
## Multiple R-squared: 0.3839, Adjusted R-squared: 0.3557
## F-statistic: 13.59 on 5 and 109 DF, p-value: 2.667e-10
kup_picturename_plot <- tidy(kup_picturename) %>%
mutate(model = "picture-naming, N = 115, R2 = .44") %>%
arrange(term)
kup_naive <- lm(KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
sona_aoa_years, complete_predictors)
summary(kup_naive)
##
## Call:
## lm(formula = KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + sona_aoa_years, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.81816 -0.39035 -0.07628 0.42448 2.87536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.28208 3.23659 1.941 0.054845 .
## preschoolness -0.36124 0.09977 -3.621 0.000447 ***
## helpfulness -0.06336 0.11371 -0.557 0.578546
## childes_adult_log_freq -0.19567 0.07805 -2.507 0.013654 *
## concreteness -0.04383 0.65955 -0.066 0.947132
## sona_aoa_years 0.22538 0.07334 3.073 0.002675 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7147 on 109 degrees of freedom
## Multiple R-squared: 0.4069, Adjusted R-squared: 0.3797
## F-statistic: 14.96 on 5 and 109 DF, p-value: 3.644e-11
kup_naive_plot <- tidy(kup_naive) %>%
mutate(model = "naive_adults, N = 115, R2 = .38") %>%
arrange(term)
print(anova(kup_pr, kup_picturename, kup_naive))
## Analysis of Variance Table
##
## Model 1: KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + wordbank_aoa_years
## Model 2: KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + morrison_aoa_threshold_years
## Model 3: KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq +
## concreteness + sona_aoa_years
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 109 52.890
## 2 109 57.833 0 -4.9432
## 3 109 55.674 0 2.1593
# kup_childes <- lm(KupermanAoA ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
# childes_est_aoa_years, complete_predictors)
# summary(kup_childes)
# kup_childes_plot <- tidy(kup_childes) %>%
# mutate(model = "childes, N = 23, R2 = .35") %>%
# arrange(term)
kuperman_plot <- bind_rows(kup_pr_plot, kup_picturename_plot, kup_naive_plot)
dwplot(kuperman_plot,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Kuperman AoA") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.525, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
Kuperman predicts picture-naming; Wordbank & naive adult ratings do not.
pn_pr <- lm(morrison_aoa_threshold_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
wordbank_aoa_years, complete_predictors)
summary(pn_pr)
##
## Call:
## lm(formula = morrison_aoa_threshold_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + wordbank_aoa_years,
## data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4996 -0.6993 -0.1218 0.6529 3.8487
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.5126 6.7228 2.456 0.01562 *
## preschoolness 0.4336 0.1875 2.312 0.02263 *
## helpfulness 0.6265 0.2073 3.022 0.00313 **
## childes_adult_log_freq -0.7235 0.1636 -4.423 2.31e-05 ***
## concreteness -2.6099 1.2292 -2.123 0.03600 *
## wordbank_aoa_years 0.8342 0.6403 1.303 0.19540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.314 on 109 degrees of freedom
## Multiple R-squared: 0.3694, Adjusted R-squared: 0.3405
## F-statistic: 12.77 on 5 and 109 DF, p-value: 8.982e-10
pn_pr_plot <- tidy(pn_pr) %>%
mutate(model = "Wordbank, N = 123, R2 = .34") %>%
arrange(term)
pn_kup <- lm(morrison_aoa_threshold_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
KupermanAoA, complete_predictors)
summary(pn_kup)
##
## Call:
## lm(formula = morrison_aoa_threshold_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + KupermanAoA, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4615 -0.8758 -0.1451 0.5611 3.7114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.9464 5.9574 3.012 0.00322 **
## preschoolness 0.5236 0.1913 2.738 0.00723 **
## helpfulness 0.6663 0.2051 3.249 0.00154 **
## childes_adult_log_freq -0.7468 0.1294 -5.771 7.49e-08 ***
## concreteness -2.9342 1.1917 -2.462 0.01538 *
## KupermanAoA 0.3730 0.1664 2.241 0.02705 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.295 on 109 degrees of freedom
## Multiple R-squared: 0.3878, Adjusted R-squared: 0.3597
## F-statistic: 13.81 on 5 and 109 DF, p-value: 1.919e-10
pn_kup_plot <- tidy(pn_kup) %>%
mutate(model = "kuperman, N = 123, R2 = .36") %>%
arrange(term)
pn_naive <- lm(morrison_aoa_threshold_years ~ preschoolness + helpfulness + childes_adult_log_freq + concreteness +
sona_aoa_years, complete_predictors)
summary(pn_naive)
##
## Call:
## lm(formula = morrison_aoa_threshold_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + sona_aoa_years, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8199 -0.6746 -0.1121 0.6761 3.8652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.60917 5.99591 3.437 0.000833 ***
## preschoolness 0.37955 0.18483 2.054 0.042412 *
## helpfulness 0.62836 0.21065 2.983 0.003524 **
## childes_adult_log_freq -0.86250 0.14459 -5.965 3.08e-08 ***
## concreteness -2.90437 1.22183 -2.377 0.019195 *
## sona_aoa_years 0.01228 0.13586 0.090 0.928139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.324 on 109 degrees of freedom
## Multiple R-squared: 0.3596, Adjusted R-squared: 0.3303
## F-statistic: 12.24 on 5 and 109 DF, p-value: 1.998e-09
pn_naive_plot <- tidy(pn_naive) %>%
mutate(model = "naive_adults, N = 123, R2 = .33") %>%
arrange(term)
print(anova(pn_pr, pn_kup))
## Analysis of Variance Table
##
## Model 1: morrison_aoa_threshold_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + wordbank_aoa_years
## Model 2: morrison_aoa_threshold_years ~ preschoolness + helpfulness +
## childes_adult_log_freq + concreteness + KupermanAoA
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 109 188.15
## 2 109 182.66 0 5.4872
pn_plot <- bind_rows(pn_pr_plot, pn_kup_plot, pn_naive_plot)
dwplot(pn_plot,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting picture-naming AoA") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
ggplot(complete_predictors, aes(x = wordbank_aoa_years, y = KupermanAoA, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = KupermanAoA, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
ggplot(complete_predictors, aes(x = morrison_aoa_threshold_years, y = wordbank_aoa_years, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
ggplot(complete_predictors, aes(y = morrison_aoa_threshold_years, x = sona_aoa_years, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
ggplot(complete_predictors, aes(y = sona_aoa_years, x = wordbank_aoa_years, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
ggplot(complete_predictors, aes(x = sona_aoa_years, y = KupermanAoA, label=word))+
geom_point()+
geom_label()+
geom_abline(slope=1, intercept=0)+
theme_classic()+
xlim(0,10)+
ylim(0,10)
hist(complete_predictors$wordbank_minus_kuperman)
hist(complete_predictors$kuperman_minus_pn)
hist(complete_predictors$wordbank_minus_pn)
hist(complete_predictors$wordbank_minus_sona)
hist(complete_predictors$sona_minus_pn)
hist(complete_predictors$sona_minus_kuperman)
pr_k_full_fm <- write_model_formula("wordbank_minus_kuperman")
pr_k_full <- lm(pr_k_full_fm, complete_predictors)
summary(pr_k_full)
##
## Call:
## lm(formula = pr_k_full_fm, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.71841 -0.41582 0.02148 0.42687 1.56469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.30918 3.13894 -0.736 0.46351
## preschoolness 0.32388 0.09681 3.346 0.00112 **
## helpfulness 0.10765 0.10992 0.979 0.32958
## childes_adult_log_freq 0.15453 0.06305 2.451 0.01583 *
## concreteness -0.44455 0.64128 -0.693 0.48963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6967 on 110 degrees of freedom
## Multiple R-squared: 0.2016, Adjusted R-squared: 0.1725
## F-statistic: 6.943 on 4 and 110 DF, p-value: 5.071e-05
pr_k_full_plot <- tidy(pr_k_full) %>%
mutate(model = "Full model, R2 = .17") %>%
arrange(term)
#pr_k_allmodels_plot <- bind_rows(pr_k_full_plot, pr_k_nohyper_plot, pr_k_reduced_plot) %>% filter(term != "(Intercept)")
dwplot(pr_k_full_plot,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Wordbank - Kuperman") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
pn_k_full_fm <- write_model_formula("kuperman_minus_pn")
pn_k_full <- lm(pn_k_full_fm, complete_predictors)
summary(pn_k_full)
##
## Call:
## lm(formula = pn_k_full_fm, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7753 -0.8172 0.1200 0.8702 3.8537
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.3782 6.1725 -2.167 0.03236 *
## preschoolness -0.7685 0.1904 -4.037 0.00010 ***
## helpfulness -0.7342 0.2162 -3.397 0.00095 ***
## childes_adult_log_freq 0.5399 0.1240 4.354 3.01e-05 ***
## concreteness 2.9975 1.2610 2.377 0.01918 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.37 on 110 degrees of freedom
## Multiple R-squared: 0.2585, Adjusted R-squared: 0.2315
## F-statistic: 9.586 on 4 and 110 DF, p-value: 1.094e-06
pn_k_full_plot <- tidy(pn_k_full) %>%
mutate(model = "Full model, R2 = .23") %>%
arrange(term)
# pn_k_backwards <- lm(kuperman_minus_pn ~ preschoolness+helpfulness+childes_adult_log_freq, complete_predictors)
# summary(pn_k_backwards)
# pn_k_backwards_plot <- tidy(pn_k_backwards) %>%
# mutate(model = "Backward, R2 = .20") %>%
# arrange(term)
k_pn_interact <- lm(kuperman_minus_pn ~ preschoolness_centered*frequency_centered + helpfulness_centered + concreteness_centered, complete_predictors)
summary(k_pn_interact)
##
## Call:
## lm(formula = kuperman_minus_pn ~ preschoolness_centered * frequency_centered +
## helpfulness_centered + concreteness_centered, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9775 -0.8005 0.0971 0.8564 3.8618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9971 0.1353 7.367 3.55e-11
## preschoolness_centered -0.7584 0.1933 -3.924 0.000153
## frequency_centered 0.5425 0.1247 4.350 3.07e-05
## helpfulness_centered -0.7202 0.2206 -3.265 0.001465
## concreteness_centered 2.9392 1.2767 2.302 0.023227
## preschoolness_centered:frequency_centered -0.0582 0.1643 -0.354 0.723850
##
## (Intercept) ***
## preschoolness_centered ***
## frequency_centered ***
## helpfulness_centered **
## concreteness_centered *
## preschoolness_centered:frequency_centered
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.375 on 109 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2254
## F-statistic: 7.633 on 5 and 109 DF, p-value: 3.5e-06
k_pn_interact_plot <- tidy(k_pn_interact) %>%
mutate(model = "Full model, R2 = .23") %>%
arrange(term)
# pn_k_allmodels_plot <- bind_rows(pn_k_full_plot, pn_k_backwards_plot) %>%
# filter(term != "(Intercept)")
dwplot(filter(pn_k_full_plot, term != "(Intercept)"),
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(c(pos_scale_n_defs = "N defs",
pos_scale_hypernyms = "hypernyms",
pos_scale_hyponyms = "hyponyms",
pos_scale_n_synsets = "N synsets",
childes_adult_log_freq = "frequency")) +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Kuperman - Picture-naming") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
pn_pr_full_fm <- write_model_formula("wordbank_minus_pn")
pn_pr_full <- lm(pn_pr_full_fm, complete_predictors)
summary(pn_pr_full)
##
## Call:
## lm(formula = pn_pr_full_fm, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8428 -0.6910 0.1156 0.7098 2.4371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.6874 5.8945 -2.661 0.00895 **
## preschoolness -0.4446 0.1818 -2.446 0.01604 *
## helpfulness -0.6266 0.2064 -3.035 0.00300 **
## childes_adult_log_freq 0.6944 0.1184 5.865 4.8e-08 ***
## concreteness 2.5530 1.2042 2.120 0.03626 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.308 on 110 degrees of freedom
## Multiple R-squared: 0.2805, Adjusted R-squared: 0.2544
## F-statistic: 10.72 on 4 and 110 DF, p-value: 2.243e-07
pn_pr_full_plot <- tidy(pn_pr_full) %>%
mutate(model = "Full model, R2 = .25") %>%
arrange(term)
# pn_pr_backwards <- lm(pn_minus_parentreport ~ helpfulness+childes_adult_log_freq+concreteness, complete_predictors)
# summary(pn_pr_backwards)
# pn_pr_backwards_plot <- tidy(pn_pr_backwards) %>%
# mutate(model = "Backward, R2 = .34") %>%
# arrange(term)
# pn_pr_allmodels_plot <- bind_rows(pn_pr_full_plot, pn_pr_backwards_plot) %>%
# filter(term != "(Intercept)")
dwplot(filter(pn_pr_full_plot, term != "(Intercept)"),
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(c(pos_scale_n_defs = "N defs",
pos_scale_hypernyms = "hypernyms",
pos_scale_hyponyms = "hyponyms",
pos_scale_n_synsets = "N synsets",
childes_adult_log_freq = "frequency")) +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Wordbank - Picture-naming") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
pr_sona_full_fm <- write_model_formula("wordbank_minus_sona")
pr_sona_full <- lm(pr_sona_full_fm, complete_predictors)
summary(pr_sona_full)
##
## Call:
## lm(formula = pr_sona_full_fm, data = complete_predictors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9935 -0.6768 0.1137 0.5900 1.9150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.52377 3.93096 0.133 0.894
## preschoolness 0.06335 0.12124 0.523 0.602
## helpfulness 0.19887 0.13766 1.445 0.151
## childes_adult_log_freq 0.42026 0.07896 5.322 5.47e-07 ***
## concreteness -0.98628 0.80309 -1.228 0.222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8725 on 110 degrees of freedom
## Multiple R-squared: 0.2688, Adjusted R-squared: 0.2423
## F-statistic: 10.11 on 4 and 110 DF, p-value: 5.233e-07
pr_sona_full_plot <- tidy(pr_sona_full) %>%
mutate(model = "Full model, R2 = .24") %>%
arrange(term)
# pn_pr_backwards <- lm(pn_minus_parentreport ~ helpfulness+childes_adult_log_freq+concreteness, complete_predictors)
# summary(pn_pr_backwards)
# pn_pr_backwards_plot <- tidy(pn_pr_backwards) %>%
# mutate(model = "Backward, R2 = .34") %>%
# arrange(term)
dwplot(filter(pr_sona_full_plot, term != "(Intercept)"),
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(c(pos_scale_n_defs = "N defs",
pos_scale_hypernyms = "hypernyms",
pos_scale_hyponyms = "hyponyms",
pos_scale_n_synsets = "N synsets",
childes_adult_log_freq = "frequency")) +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting Wordbank - Naive adult") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.007, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())
library(ggrepel)
ggplot(complete_predictors, aes(x = wordbank_minus_sona, y = childes_adult_log_freq, label=word))+
geom_point()+
geom_text_repel(segment.alpha=.3, size=3)+
theme_classic()+
geom_smooth(method="lm")
k_pn_comparison_plot <- tidy(pn_k_full) %>%
mutate(model = "Kuperman-PN, R2 = .23") %>%
arrange(term)
w_pn_comparison_plot <- tidy(pn_pr_full) %>%
mutate(model = "Wordbank-PN, R2 = .25") %>%
arrange(term)
comparison_plots <- bind_rows(k_pn_comparison_plot, w_pn_comparison_plot) %>%
filter(term != "(Intercept)")
dwplot(comparison_plots,
vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
relabel_predictors(childes_adult_log_freq = "frequency") +
theme_bw() + xlab("Coefficient Estimate") + ylab("") +
geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
ggtitle("Predicting subj - ground truth measures (N = 115)") +
theme(plot.title = element_text(face="bold"),
legend.position = c(0.69, 0.01),
legend.justification = c(0, 0),
legend.background = element_rect(colour="grey80"),
legend.title = element_blank())