Language-wise correlations
corr_df <- read_csv("data/lang_pairwise_correlations_by_syntactic_word_type.csv",
col_names = c('lang1', "lang2", "test_group", "word_type", "cor"))
group_means <- corr_df %>%
group_by(test_group, word_type) %>%
multi_boot_standard(col = "cor")
ggplot(group_means, aes(x = test_group, fill = word_type, group = word_type, y = mean)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position=position_dodge(.9)) +
ylab("Mean Language-wise Correlation") +
xlab("Score Group") +
theme_classic()
kable(group_means)
test_group | word_type | ci_lower | ci_upper | mean |
---|---|---|---|---|
high | content | 0.2573590 | 0.2612271 | 0.2593317 |
high | grammatical | 0.4719839 | 0.4817329 | 0.4769835 |
low | content | 0.2524864 | 0.2574228 | 0.2548657 |
low | grammatical | 0.4342693 | 0.4437612 | 0.4389072 |
plot_dendro <- function(data, tg, wt){
all_pairs <- cross_df(data.frame(lang1 = unique(c(data$lang1, data$lang2)),
lang2 = unique(c(data$lang1, data$lang2))))
test <- data %>%
select(lang1, lang2, cor) %>%
right_join(all_pairs) %>%
mutate(cor = case_when(lang1 == lang2 ~ 1, TRUE ~ cor)) %>%
spread(lang2, cor)
prompt_mean_centroids_mat = as.matrix(test[,-1])
rownames(prompt_mean_centroids_mat) = colnames(prompt_mean_centroids_mat)
dist_matrix <- dist(prompt_mean_centroids_mat)
title <- paste(tg, " ", wt)
#print(title)
dist_matrix
hclust(dist_matrix) %>%
as.dendrogram(horiz = TRUE) %>%
plot(horiz = TRUE, yaxt = 'n', main = title)
}
nested_df <- corr_df %>%
group_by(test_group, word_type) %>%
nest()
p1 <- plot_dendro(nested_df$data[1][[1]], "low", "grammatical")
p2 <- plot_dendro(nested_df$data[2][[1]], "high", "grammatical")
p3 <- plot_dendro(nested_df$data[3][[1]], "low", "content")
p4 <- plot_dendro(nested_df$data[4][[1]], "high", "content")
#plot_grid(plot(p1), plot(p2), plot(p3), plot(p4), nrow = 2, labels = c("lg", "gh", "lc", "hc"))