WIKI_PATH <- "wiki/data/lang_pairwise_tile_correlations.csv"
ETS_PATH <- "ets/data/lang_pairwise_tile_correlations_ets_decile.csv"
BAD_LANGS <- c("ig", "vi", "yo", "th", "ne", "ur")
conc_corr_wiki <- read_csv(WIKI_PATH,
col_names = c("tile1", "tile2", "corr", "lang1", "lang2")) %>%
filter(!(lang1 %in% BAD_LANGS),
!(lang2 %in% BAD_LANGS)) %>%
mutate(corpus = "wiki")
conc_corr_ets <- read_csv(ETS_PATH,
col_names = c("tile1", "tile2", "corr", "lang1", "lang2")) %>%
mutate(corpus = "ets")
conc_corr <- bind_rows(conc_corr_wiki, conc_corr_ets)
Across deciles
conc_corr_ms_diff <- conc_corr %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 < tile1) %>%
mutate(tile1 = as.factor(tile1))
ggplot(conc_corr_ms_diff, aes(x = tile2, y = mean, color = tile1,
group = tile1)) +
scale_colour_discrete( name = "Decile 2") +
facet_wrap(.~ corpus) +
geom_line() +
ylab("Cross-linguistic word-pairwise correlation\n (Pearson's r)") +
geom_pointrange(aes(ymin = ci_lower, max = ci_upper), size = .05) +
scale_x_continuous(breaks = 1:10) +
ggtitle("Mean correlation by decile (tile2 < tile1)") +
xlab("Decile 1") +
theme_classic()

conc_corr_ms <- conc_corr %>%
group_by(tile1, tile2) %>%
multi_boot_standard(col = "corr")
ggplot(conc_corr_ms_diff, aes(x = tile1, y = tile2, fill = mean)) +
facet_wrap(.~ corpus) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") +
theme_classic()

Within deciles
conc_corr_ms_same <- conc_corr %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 == tile1) %>%
mutate(tile1 = as.factor(tile1))
ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,,
group = corpus, color = corpus)) +
scale_colour_discrete( name = "Decile 2") +
geom_smooth(method = "lm",formula = y~poly(x,2), se = F) +
#geom_smooth(method = "lm", se = F) +
ylab("Cross-linguistic word-pairwise correlation\n (Pearson's r)") +
geom_pointrange(aes(ymin = ci_lower, max = ci_upper), size = .1) +
scale_x_continuous(breaks = 1:10) +
ggtitle("Mean correlation by decile (tile2 = tile1)") +
xlab("Decile 1") +
theme_classic()
