MTLD words by usefuleness

Do kids that know words at T2 (using mtld measure) know words that are more “useful” at T1? In particular, we define useful words to be words that allow you to make more trigrams in switchboard (http://www.anc.org/data/oanc/contents/).

Group level

# Get all unique words for two groups at t1. 
groups_info <- read_csv("../1_exploration/groups_info.csv")
target_types <- read_csv("../1_exploration/target_types_delta_450_1150.csv") %>%
  mutate(tbin = fct_recode(tbin, 
                           t1 = "low",
                           t2 = "high")) %>%
  mutate(gloss = tolower(gloss))

TOPNWORDS_PER_GROUP <- 500

t1_words <- target_types %>%
  filter(tbin == "t1") %>% # #t1 only
  left_join(groups_info %>% 
              mutate(target_child_id = as.numeric(target_child_id))) %>% # merge in group info
  group_by(delta_resid_group, gloss)  %>% # get glosses by group
  summarize(n = n()) %>% # get gloss counts by group
  top_n(TOPNWORDS_PER_GROUP) %>%
  group_by(delta_resid_group)  %>%
  slice(1:TOPNWORDS_PER_GROUP) %>%
  arrange(delta_resid_group, -n)

sw_trigrams <- read_csv("switchboard_trigrams.csv") %>%
  data.table() 

t1_words_high <- t1_words %>%
                    filter(delta_resid_group == "high") %>%
                    select(gloss) %>%
  unlist(use.names = F)


t1_words_low <- t1_words %>%
                    filter(delta_resid_group == "low") %>%
                    select(gloss) %>%
  unlist(use.names = F)

high_trigrams <- sw_trigrams[w1 %in% t1_words_high &
                                 w2 %in% t1_words_high & 
                                 w3 %in% t1_words_high] %>%
  mutate(vocab_group = "high")

low_trigrams <- sw_trigrams[w1 %in% t1_words_low &
                                 w2 %in% t1_words_low & 
                                 w3 %in% t1_words_low] %>%
  mutate(vocab_group = "low")

all_trigrams <- bind_rows(high_trigrams, low_trigrams)

 all_trigrams %>%
  mutate(log_freq = log(freq)) %>%
  group_by(vocab_group) %>%
  multi_boot_standard(col = "log_freq", na.rm = T) %>%
  ggplot(aes( x = vocab_group, y = mean, group = vocab_group, fill = vocab_group)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
                 position=position_dodge(width=0.9)) +
  theme_classic()

  #summarize(mean_freq= mean(log_freq),
  #          ci_lower_freq = mean(log_freq) - (1.96 * sd(log_freq)),
  #          ci_upper_freq = mean(log_freq) + (1.96 * sd(log_freq)))
  

 all_trigrams %>%
  group_by(vocab_group) %>%
  multi_boot_standard(col = "freq", na.rm = T) %>%
  kable()

vocab_group	ci_lower	ci_upper	mean
high	4.069088	4.361839	4.208633
low	2.027058	2.460856	2.219358

Individual Kids

good_types_t1 <- target_types %>%
  left_join(groups_info %>% 
              mutate(target_child_id = as.numeric(target_child_id))) %>%
  filter(tbin == "t1") %>%
  filter(gloss %in% t1_words$gloss)

get_trigrams_by_kid <- function(df, all_trigrams){
     current_trigrams <- all_trigrams[w1 %in% df$gloss &
                                 w2 %in% df$gloss & 
                                 w3 %in% df$gloss] 
     
      mean(log(current_trigrams$freq))
}


trigram_freqs_by_kid <- good_types_t1 %>%
              nest(-target_child_id, -tbin) %>%
              mutate(log_mean_trigram_frequency = 
                       map(data, get_trigrams_by_kid,
                           sw_trigrams)) %>%
            select(-data) %>%
            unnest() %>%
            left_join(groups_info %>% select(target_child_id, delta_resid_group))


ggplot(trigram_freqs_by_kid, 
       aes(x = log_mean_trigram_frequency, 
                         fill = delta_resid_group, 
                         group = delta_resid_group)) +
  geom_density(alpha = .4) +
  theme_classic()

trigram_freqs_by_kid %>%
  group_by(delta_resid_group) %>%
  multi_boot_standard(col = "log_mean_trigram_frequency", na.rm = T) %>%
    ggplot(aes( x = delta_resid_group, y = mean, group = delta_resid_group, fill = delta_resid_group)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
                 position=position_dodge(width=0.9)) +
  theme_classic()

MTLD words by usefuleness

defined as adult trigrams from switchboard

2018-05-02

Group level

Individual Kids