library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

library(tidyverse)
library(langcog)

theme_set(theme_classic(base_size = 10))

MTLD on average

ld_df <- read_csv("diversity_measures_by_age.csv", 
                  col_names = c("target_child_name",
                                "corpus_name",
                                "target_child_id",
                                "target_child_age",
                                "mtld")) %>%
  filter(is.finite(mtld)) 

ld_df %>%
  filter(target_child_age < 2000) %>%
  mutate(age_bin = cut(target_child_age, 
                       breaks = 30)) %>%
  group_by(age_bin) %>%
  multi_boot_standard(col = "mtld") %>%
  ggplot(aes(x = age_bin, y = mean, group = 1)) +
  geom_smooth() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), size = .2) +
  ggtitle("CHILDES MTLD across development") +
  ylab("MTLD") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

MTLD by kid

longitudinal_kids <- ld_df %>%
  count(target_child_name, corpus_name) %>%
  filter(n > 1)

longitudinal_df <- ld_df %>%
filter(target_child_age < 2000) %>%
 right_join(longitudinal_kids)

ggplot(longitudinal_df, 
       aes(x = target_child_age, y = mtld, group = 1)) +
  facet_wrap(~target_child_id, ncol = 8) +
  geom_line(color = "blue") +
  geom_point(size = .4) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))