ETS_CLUSTER_CORR_PATH <- here("analyses/09_clustering_control/data/ets/lang_pairwise_semantics_correlations_ets_by_cluster_ward.csv")


cluster_ets <-  read_csv(ETS_CLUSTER_CORR_PATH,
                        col_names = c("cluster1", "cluster2", "cor", "lang1", "lang2")) %>%
  mutate(corpus = "TOEFL")


cluster_pair_means <- cluster_ets %>%
  group_by(corpus, cluster1, cluster2) %>%
  summarize(cor = mean(cor, na.rm = T)) 

full_cluster_pair_means <- cluster_pair_means %>%
  bind_rows(data.frame(corpus = cluster_pair_means$corpus,
                       cluster2 = cluster_pair_means$cluster1,
                       cluster1 = cluster_pair_means$cluster2,
                       cor = cluster_pair_means$cor)) %>%
  mutate(same = case_when(cluster1 == cluster2 ~ "Local", TRUE ~ "Global")) %>%
  distinct()

df <- full_cluster_pair_means %>%
  group_by(corpus, cluster1, same) %>%
  multi_boot_standard(col = "cor") %>%
  ungroup()  %>%
  mutate(same = fct_rev(same))

overall_means <- df %>%
  group_by(corpus, same) %>%
  multi_boot_standard(col = "mean") 

label_df <- data.frame(corpus = c("TOEFL", "TOEFL"), 
                       same = c("Global", "Local"),
                       ypos = c(.2, .4),
                       x = c(10, 10))



df_segment <- df %>%
  select(corpus, cluster1, same, mean) %>%
  spread(same, mean) 

scale_label <- "Comparison"

#pdf("figs/local_global_plot.pdf", width = 10, height  = 4.4)
ggplot() +
    ggtitle("TOEFL- Ward Distance Clusters") +

  facet_wrap(~corpus)+
  geom_segment(data = df_segment, aes(y = Global, yend = Local, x = cluster1, xend = cluster1), 
              linetype = 1, size = .6) +
  geom_linerange(data = filter(df, same == "Global"),
                 aes(x = cluster1, ymin = ci_lower, ymax = ci_upper), color = "#377EB8", size = 1.5) +
  geom_point(data = df, size = 5, aes(x = cluster1, y = mean, color = same, shape = same)) +
  ylab("Cross-linguistic\nWord Distance Correlation") +
  scale_x_continuous(breaks = 1:10, name = "Cluster") +
  scale_shape_manual(scale_label, values = c(19, 15)) +
  scale_fill_manual(scale_label, values = c( "#E41A1C", "#377EB8" )) +
  scale_color_manual(scale_label, values = c("#E41A1C", "#377EB8" )) +
  theme_classic(base_size = 20) +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 10),
        legend.background = element_rect(linetype = 1, size = 0.5, colour = 1))

#dev.off()