WIKI_PATH <- "wiki/data/lang_pairwise_tile_correlations.csv"
ETS_PATH <- "ets/data/lang_pairwise_tile_correlations_ets_decile.csv"
BAD_LANGS <- c("ig", "vi", "yo", "th", "ne", "ur") 
conc_corr_wiki <- read_csv(WIKI_PATH,
                      col_names = c("tile1", "tile2", "corr", "lang1", "lang2"))  %>%
  filter(!(lang1 %in% BAD_LANGS),
         !(lang2 %in% BAD_LANGS)) %>%
  mutate(corpus = "wiki")

conc_corr_ets <- read_csv(ETS_PATH,
                      col_names = c("tile1", "tile2", "corr", "lang1", "lang2"))  %>%
  mutate(corpus = "ets")

conc_corr <- bind_rows(conc_corr_wiki, conc_corr_ets)

Across deciles

conc_corr_ms_diff <- conc_corr %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 < tile1) %>%
  mutate(tile1 = as.factor(tile1)) 

ggplot(conc_corr_ms_diff, aes(x = tile2, y = mean, color = tile1, 
                         group = tile1)) +
  scale_colour_discrete( name = "Decile 2") +
  facet_wrap(.~ corpus) +
  geom_line() +
  ylab("Cross-linguistic word-pairwise correlation\n (Pearson's r)") +
  geom_pointrange(aes(ymin = ci_lower, max = ci_upper), size = .05)  +
  scale_x_continuous(breaks = 1:10) +
  ggtitle("Mean correlation by decile  (tile2 < tile1)") +
  xlab("Decile 1") +
  theme_classic() 

conc_corr_ms <- conc_corr %>%
  group_by(tile1, tile2) %>%
  multi_boot_standard(col = "corr") 

ggplot(conc_corr_ms_diff, aes(x = tile1, y = tile2, fill = mean)) +
    facet_wrap(.~ corpus) +

  geom_tile() +
scale_fill_gradient(low = "white", high = "red") +
theme_classic()

Within deciles

conc_corr_ms_same <- conc_corr %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 == tile1) %>%
  mutate(tile1 = as.factor(tile1)) 

ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,, 
                         group = corpus, color = corpus)) +
  scale_colour_discrete( name = "Decile 2") +
  geom_smooth(method = "lm",formula = y~poly(x,2), se = F) +
  #geom_smooth(method = "lm", se = F) +
  ylab("Cross-linguistic word-pairwise correlation\n (Pearson's r)") +
  geom_pointrange(aes(ymin = ci_lower, max = ci_upper), size = .1)  +
  scale_x_continuous(breaks = 1:10) +
  ggtitle("Mean correlation by decile  (tile2 = tile1)") +
  xlab("Decile 1") +
  theme_classic()