CORR_INFILE <- here("analyses/07_frequency_control/data/lang_pairwise_word_freq_corrs_10.csv")
corr_infile <- read_csv(CORR_INFILE)
spearman_cor <- corr_infile %>%
filter(method == "Spearman's rank correlation rho")
ggplot(spearman_cor, aes(x = estimate)) +
geom_histogram(alpha = .9) +
xlab("Cross-linguistic word frequency correlation (rho)") +
geom_vline(aes(xintercept = mean(spearman_cor$estimate)), linetype = 2, color = "red") +
# xlim(0,1) +
theme_classic()
Mean correlation is 0.8379195, limiting to words that occur in more than 10 essays in both languages.