Local-Global difference for wikiepedia, by language
INFILE <- here("analyses/02_concreteness_semantics/data/local_global_conc.csv")
conc_in_out_wiki1 <- read_csv(INFILE) %>%
filter(corpus == "wiki")
conc_in_out_wiki2 <- conc_in_out_wiki1 %>%
rename(lang1 = lang2,
lang2 = lang1)
conc_in_out_wiki <- conc_in_out_wiki2 %>%
bind_rows(conc_in_out_wiki1)
mean_local_global_by_lang <- conc_in_out_wiki %>%
group_by(lang1) %>%
summarize(mean_dif = mean(dif)) %>%
rename(lang = lang1)
WIKI_ARTICLES <- here("analyses/10_wikipedia_stats/data/raw/num_articles.tsv")
wiki_article_stats = read_tsv(WIKI_ARTICLES) %>%
clean_names() %>%
select(wiki, articles, active_users, admins, depth) %>%
rename(lang = wiki)
all_df <- mean_local_global_by_lang %>%
left_join(wiki_article_stats) %>%
mutate(log_articles = log(articles))
ggplot(all_df, aes(x = log_articles, y = mean_dif)) +
geom_point() +
geom_smooth(method = "lm")
cor.test(all_df$mean_dif, all_df$log_articles)
##
## Pearson's product-moment correlation
##
## data: all_df$mean_dif and all_df$log_articles
## t = 1.8601, df = 32, p-value = 0.07208
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02883653 0.58838697
## sample estimates:
## cor
## 0.3123753
ggplot(all_df, aes(x = log_articles, y = mean_dif)) +
geom_label(aes(label = lang))