Language-wise correlations

corr_df <- read_csv("data/lang_pairwise_correlations_by_syntactic_word_type.csv",
                    col_names = c('lang1', "lang2", "test_group", "word_type", "cor"))

group_means <- corr_df %>%
  group_by(test_group, word_type) %>%
  multi_boot_standard(col = "cor") 

ggplot(group_means, aes(x = test_group, fill = word_type, group = word_type, y = mean)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
                 position=position_dodge(.9)) +
  ylab("Mean Language-wise Correlation") +
  xlab("Score Group") +
  theme_classic()

kable(group_means)
test_group word_type ci_lower ci_upper mean
high content 0.2573590 0.2612271 0.2593317
high grammatical 0.4719839 0.4817329 0.4769835
low content 0.2524864 0.2574228 0.2548657
low grammatical 0.4342693 0.4437612 0.4389072
plot_dendro <- function(data, tg, wt){
  
all_pairs <- cross_df(data.frame(lang1 = unique(c(data$lang1, data$lang2)),
                    lang2 = unique(c(data$lang1, data$lang2)))) 

    test <- data %>%
      select(lang1, lang2, cor) %>%
      right_join(all_pairs) %>% 
      mutate(cor = case_when(lang1 == lang2 ~ 1, TRUE ~ cor)) %>%
      spread(lang2, cor) 
    
    prompt_mean_centroids_mat = as.matrix(test[,-1])
    rownames(prompt_mean_centroids_mat) = colnames(prompt_mean_centroids_mat)
    
    dist_matrix <- dist(prompt_mean_centroids_mat)
    
    title <- paste(tg, " ", wt)
    #print(title)
    dist_matrix
      hclust(dist_matrix) %>%
      as.dendrogram(horiz = TRUE)  %>%
      plot(horiz = TRUE, yaxt = 'n', main = title)
}

nested_df <- corr_df %>%
  group_by(test_group, word_type) %>%
  nest() 

p1 <- plot_dendro(nested_df$data[1][[1]], "low", "grammatical")

p2 <- plot_dendro(nested_df$data[2][[1]], "high", "grammatical")

p3 <- plot_dendro(nested_df$data[3][[1]], "low", "content")

p4 <- plot_dendro(nested_df$data[4][[1]], "high", "content")

#plot_grid(plot(p1), plot(p2), plot(p3), plot(p4), nrow = 2, labels = c("lg", "gh", "lc", "hc"))