library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

library(tidyverse)
library(data.table)
library(broom)
library(wordbankr)

theme_set(theme_classic(base_size = 10))

Get normative aoa from wordbank

eng_ws_data <- get_instrument_data(language = "English (American)",
                                   form = "WS",
                                   administrations = TRUE,
                                   iteminfo = TRUE)

aos <- fit_aoa(eng_ws_data, prop = .2) # prop toggles how many missing

normative_aoa <- aos %>%
  mutate(session_num = ntile(aoa, 12)) %>%
  select(num_item_id, session_num) %>%
  arrange(session_num) %>%
  mutate(child_id = "NORMATIVE")  %>%
  select(child_id, num_item_id, session_num) %>% 
  data.table()

Get kid rank aoa

produced_words <- read_csv("../12_cognitive_measures/produced_words_cache.csv") %>%
  select(child_id, item, num_item_id, session_num)

# get kids that have 12 sessions
twelve_session_kids <- produced_words %>%
  group_by(child_id) %>%
  filter(session_num == max(session_num)) %>%
  distinct(child_id, session_num) %>%
  filter(session_num == 12) %>%
  pull(child_id)
 
# get order for each word for each kid, filter to twel session kdis
word_rank <- produced_words %>%
  group_by(child_id, num_item_id) %>%
  arrange(session_num) %>%
  slice(1)  %>%
  filter(child_id %in% twelve_session_kids)  %>%
  as.data.table()

# combine normative and kid data
word_rank_with_normative  <- normative_aoa %>%
  right_join(distinct(word_rank, num_item_id, item), by = "num_item_id") %>% # join in item to normative
  bind_rows(word_rank) %>%
  mutate_if(is.character, as.factor)  %>%
  data.table()

Get kid-kid and normative-kid correlations

child_combos <-  combn(c(unique(word_rank$child_id), "NORMATIVE"), 2) %>%
  t() %>%
  data.table() %>%
  stats::setNames(c( "child_id1", "child_id2")) 

get_pairwise_correlation <- function(id1, id2, df){

  # inner join
  joined_df <- merge(df[child_id == id1], df[child_id == id2],
        all = FALSE,
        by = c("num_item_id", "item")) 
  
  cor.test(joined_df$session_num.x,
           joined_df$session_num.y,
           method = "spearman") %>%
    tidy() %>%
    mutate(child_id1 = id1,
           child_id2 = id2)
}

kid_corrs <- map2_df(child_combos$child_id1, child_combos$child_id2,
        get_pairwise_correlation, word_rank_with_normative) %>%
  select(child_id1, child_id2, estimate, p.value)

Child-child and child-normative corrs

child_child_corrs <- kid_corrs %>%
  filter(child_id1 != "NORMATIVE"| child_id2 != "NORMATIVE") %>%
  mutate(type = "kid_kid")

normative_corrs <- kid_corrs %>%
  filter(child_id1 == "NORMATIVE"| child_id2 == "NORMATIVE") %>%
 mutate(type = "normative_kid")

all_corrs <- bind_rows(child_child_corrs,normative_corrs)

ggplot(all_corrs, aes(x = estimate, fill = type)) +
  geom_histogram() +
  theme_classic()

ggplot(all_corrs, aes(x = estimate, fill = type)) +
  geom_density(alpha = .3) +
  theme_classic()