library(knitr)
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = T, tidy = F, fig.width=12, fig.height=10)
library(tidyverse)
library(langcog)
library(data.table)
library(feather)
theme_set(theme_classic(base_size = 10))Distribution of various vocabulary measures by-kid. Facets are ordered by MTLD/MLU, residualizing out mtld/mlu at t1, age_t1, age_diff, log_transcript_length_t1, log_transcript_length_t2, log_freq_t1.
# vocab measures
childes_embedding <- read_csv("data/childes_embedding_dist.csv") %>%
rename(measure_name = type,
value = dist)
mcrae <- read_csv("data/mcrae_dist.csv") %>%
select(target_child_id, Density_No_Tax, Num_Corred_Pairs_No_Tax,
Num_Feats_No_Tax, Num_Disting_Feats_No_Tax) %>%
gather(key = "measure_name", "value", -target_child_id)
swow <- read_csv("data/swow_dist.csv") %>%
select(target_child_id, trans_prob) %>%
filter(!is.na(trans_prob)) %>%
rename(value = trans_prob) %>%
mutate(measure_name = "swow_trans_prob")
all_vocab <- bind_rows(childes_embedding, mcrae) %>%
bind_rows(swow)
# outcome variables
outcome <- read_csv("data/outcome_variables.csv") %>%
select(target_child_id, mtld_resid, mlu_resid)
full_df <- all_vocab %>%
full_join(outcome) %>%
filter(!is.na(measure_name))full_df %>%
mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
filter(measure_name == "pairwise_dist") %>%
ggplot(aes(x = value, fill = mtld_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())full_df %>%
mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
filter(measure_name == "pairwise_dist") %>%
ggplot(aes(x = value, fill = mlu_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())full_df %>%
mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
filter(measure_name == "frequency_resid") %>%
ggplot(aes(x = value, fill = mtld_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())full_df %>%
mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
filter(measure_name == "frequency_resid") %>%
ggplot(aes(x = value, fill = mlu_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())full_df %>%
mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
filter(measure_name == "swow_trans_prob") %>%
ggplot(aes(x = log(value), fill = mtld_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())full_df %>%
mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
filter(measure_name == "swow_trans_prob") %>%
ggplot(aes(x = log(value), fill = mlu_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())## Num_Corred_Pairs_No_Tax {.tabset}
### MTLD
full_df %>%
mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
filter(measure_name == "Num_Corred_Pairs_No_Tax") %>%
ggplot(aes(x = value, fill = mtld_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())### MLU
full_df %>%
mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
filter(measure_name == "Num_Corred_Pairs_No_Tax") %>%
ggplot(aes(x = value, fill = mlu_resid))+
geom_density() +
facet_wrap(~target_child_id,scales = "free",
ncol = 9) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())