library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = T, tidy = F, fig.width=12, fig.height=10)

library(tidyverse)
library(langcog)
library(data.table)
library(feather)

theme_set(theme_classic(base_size = 10))

Distribution of various vocabulary measures by-kid. Facets are ordered by MTLD/MLU, residualizing out mtld/mlu at t1, age_t1, age_diff, log_transcript_length_t1, log_transcript_length_t2, log_freq_t1.

# vocab measures
childes_embedding <- read_csv("data/childes_embedding_dist.csv")  %>%
  rename(measure_name = type,
         value = dist)
mcrae <- read_csv("data/mcrae_dist.csv") %>%
  select(target_child_id, Density_No_Tax, Num_Corred_Pairs_No_Tax, 
         Num_Feats_No_Tax, Num_Disting_Feats_No_Tax) %>%
  gather(key = "measure_name", "value", -target_child_id)

swow <- read_csv("data/swow_dist.csv") %>%
  select(target_child_id, trans_prob) %>%
  filter(!is.na(trans_prob)) %>%
  rename(value = trans_prob) %>%
  mutate(measure_name = "swow_trans_prob")

all_vocab <- bind_rows(childes_embedding, mcrae) %>%
  bind_rows(swow)

# outcome variables
outcome <- read_csv("data/outcome_variables.csv") %>%
  select(target_child_id, mtld_resid, mlu_resid)

full_df <- all_vocab %>%
            full_join(outcome) %>%
  filter(!is.na(measure_name))

pairwise dist from childes embedding model

MTLD

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
  filter(measure_name == "pairwise_dist") %>%
  ggplot(aes(x = value, fill = mtld_resid))+
  geom_density() +
  facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

MLU

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
  filter(measure_name == "pairwise_dist") %>%
  ggplot(aes(x = value, fill = mlu_resid))+
  geom_density() +
   facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

pairwise dist from childes embedding model, freq resid

MTLD

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
  filter(measure_name == "frequency_resid") %>%
  ggplot(aes(x = value, fill = mtld_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

MLU

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
  filter(measure_name == "frequency_resid") %>%
  ggplot(aes(x = value, fill = mlu_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

Swow forward transitional probability (log)

MTLD

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
  filter(measure_name == "swow_trans_prob") %>%
  ggplot(aes(x = log(value), fill = mtld_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

MLU

full_df %>%
  mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
  filter(measure_name == "swow_trans_prob") %>%
  ggplot(aes(x = log(value), fill = mlu_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())

## Num_Corred_Pairs_No_Tax {.tabset}

### MTLD
full_df %>%
  mutate(target_child_id = reorder(target_child_id, mtld_resid)) %>%
  filter(measure_name == "Num_Corred_Pairs_No_Tax") %>%
  ggplot(aes(x = value, fill = mtld_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())
### MLU
full_df %>%
  mutate(target_child_id = reorder(target_child_id, mlu_resid)) %>%
  filter(measure_name == "Num_Corred_Pairs_No_Tax") %>%
  ggplot(aes(x = value, fill = mlu_resid))+
  geom_density() +
    facet_wrap(~target_child_id,scales = "free", 
             ncol = 9) +
  theme(strip.background = element_blank(),
                  strip.text.x = element_blank())