ETS_CLUSTER_CORR_PATH <- here("analyses/11_cluster_control/data/mean_cluster_corrs_native/")
cluster_ets <- map_df(list.files(ETS_CLUSTER_CORR_PATH, full.names = T),
~read_csv(., col_names = c("cluster", "same", "nclusters", "cor", "lang1", "lang2"))) %>%
filter(!is.na(cor))
# aggregate within language pairs
language_pair_means_ets <- cluster_ets %>%
group_by(nclusters, lang1, lang2, same) %>%
summarize(cor = mean(cor, na.rm = T)) %>%
mutate(corpus = "ets")
ETS
ets_ribbon_data <- language_pair_means_ets %>%
group_by(same, nclusters) %>%
multi_boot_standard(col = "cor", na.rm = T)
ets_ribbon_data_for_plotting <- ets_ribbon_data %>%
group_by(nclusters) %>%
nest()%>%
mutate(mean_max = map_dbl(data, ~filter(., same == "local") %>% pull(mean)),
mean_min = map_dbl(data, ~filter(., same == "global") %>% pull(mean))) %>%
unnest() %>%
ungroup()
ggplot(ets_ribbon_data_for_plotting, aes(x = nclusters, y = mean, color = same)) +
geom_ribbon(aes(x = nclusters, ymin = mean_min, ymax = mean_max), alpha = .1, color = "white") +
geom_pointrange(aes(ymin = ci_lower, max = ci_upper)) +
geom_line(size = 2) +
xlab("Number of Clusters (log)") +
ylab("Word distance correlation") +
scale_x_log10() +
#annotation_logticks() +
scale_color_manual("Comparison", values = c( "#377EB8", "#E41A1C")) +
# facet_wrap(~corpus)+
ylab("Cross-linguistic\nWord Distance Correlation") +
theme_classic(base_size = 20) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
legend.background = element_rect(linetype = 1, size = 0.5, colour = 1))
