Local-Global difference for wikiepedia, by language

INFILE <- here("analyses/02_concreteness_semantics/data/local_global_conc.csv")
conc_in_out_wiki1 <- read_csv(INFILE) %>%
  filter(corpus == "wiki")

conc_in_out_wiki2 <- conc_in_out_wiki1 %>%
 rename(lang1 = lang2,
        lang2 = lang1) 

conc_in_out_wiki <- conc_in_out_wiki2 %>%
  bind_rows(conc_in_out_wiki1) 
  
mean_local_global_by_lang <- conc_in_out_wiki %>%
  group_by(lang1) %>%
  summarize(mean_dif = mean(dif)) %>%
  rename(lang = lang1)
WIKI_ARTICLES <- here("analyses/10_wikipedia_stats/data/raw/num_articles.tsv")
wiki_article_stats = read_tsv(WIKI_ARTICLES) %>%
  clean_names() %>%
  select(wiki, articles, active_users, admins, depth) %>%
  rename(lang = wiki)
all_df <- mean_local_global_by_lang %>%
  left_join(wiki_article_stats) %>%
  mutate(log_articles = log(articles))

ggplot(all_df, aes(x = log_articles, y = mean_dif)) +
  geom_point() +
  geom_smooth(method = "lm")

cor.test(all_df$mean_dif, all_df$log_articles)
## 
##  Pearson's product-moment correlation
## 
## data:  all_df$mean_dif and all_df$log_articles
## t = 1.8601, df = 32, p-value = 0.07208
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02883653  0.58838697
## sample estimates:
##       cor 
## 0.3123753
ggplot(all_df, aes(x = log_articles, y = mean_dif)) +
  geom_label(aes(label = lang))