Embedding scores are correlated with human occupation gender ratings, but they are not more correlated in their own language than across languages.
Embedding scores:
EMBEDDING_SCORE_PATH <- "embedding_gender_bias_occupations.csv"
embedding_scores <- read_csv(EMBEDDING_SCORE_PATH,
col_names = c("wiki_lang_code", "occupation", "female_targets",
"male_targets", "male_score")) %>%
filter(!(occupation %in% c("male_targets", "female_targets")))
count(embedding_scores, wiki_lang_code) %>%
kable()
| wiki_lang_code | n |
|---|---|
| de | 391 |
| en | 418 |
| fr | 409 |
| it | 408 |
| no | 306 |
embedding_scores %>%
ggplot(aes(x = male_score, fill = wiki_lang_code)) +
geom_histogram() +
facet_wrap(~wiki_lang_code) +
theme_classic() +
theme(legend.position = "none")
Human ratings:
HUMAN_RATINGS_PATH <- "mirsesky_norms_clean.csv"
human_ratings <- read_csv(HUMAN_RATINGS_PATH) %>%
filter(!(language %in% c("czech", "slovak"))) %>%
mutate(wiki_lang_code = case_when(language == "english" ~ "en",
language == "norwegian" ~ "no",
language == "french" ~ "fr",
language == "german" ~ "de",
language == "italian" ~ "it")) %>%
select(-language)
count(human_ratings, wiki_lang_code) %>%
kable()
| wiki_lang_code | n |
|---|---|
| de | 422 |
| en | 422 |
| fr | 422 |
| it | 422 |
| no | 422 |
human_ratings %>%
ggplot(aes(x = mean_gender_rating, fill = wiki_lang_code)) +
geom_histogram() +
facet_wrap(~wiki_lang_code) +
theme_classic() +
theme(legend.position = "none")
all_scores <- embedding_scores %>%
select(-female_targets, -male_targets) %>%
full_join(human_ratings)
all_scores %>%
ggplot(aes(y = -mean_gender_rating, x = male_score)) +
geom_point(aes(color = wiki_lang_code)) +
xlab("embedding male score") +
ylab("human male score") +
geom_smooth(method = "lm") +
facet_wrap(~wiki_lang_code, scale = "free") +
theme_classic() +
theme(legend.position = "none")
full_df <- human_ratings %>%
rename(human_wiki_lang_code = wiki_lang_code) %>%
left_join(embedding_scores %>%
select(-female_targets, -male_targets) %>%
rename(embedding_wiki_lang_code = wiki_lang_code)) %>%
mutate(same_lang = human_wiki_lang_code == embedding_wiki_lang_code) %>%
filter(!is.na(embedding_wiki_lang_code))
full_df %>%
ggplot(aes(y = -mean_gender_rating,
x = male_score,
group = embedding_wiki_lang_code,
color = embedding_wiki_lang_code)) +
xlab("embedding male score") +
ylab("human male score") +
geom_smooth(method = "lm", aes(linetype = same_lang)) +
facet_wrap(~human_wiki_lang_code) +
theme_classic()
full_df %>%
group_by(human_wiki_lang_code, embedding_wiki_lang_code) %>%
summarize(cor = cor(male_score, -mean_gender_rating, use = "pairwise.complete.obs")) %>%
ggplot(aes(x = human_wiki_lang_code, y = embedding_wiki_lang_code, fill = cor)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") +
theme_classic()
same_lang_corrs <- full_df %>%
group_by(human_wiki_lang_code, embedding_wiki_lang_code) %>%
summarize(cor = cor(male_score, -mean_gender_rating, use = "pairwise.complete.obs")) %>%
mutate(same_lang = human_wiki_lang_code == embedding_wiki_lang_code) %>%
group_by(human_wiki_lang_code, same_lang) %>%
multi_boot_standard(col = "cor")
ggplot(same_lang_corrs, aes(x = human_wiki_lang_code, gorup = same_lang, color = same_lang)) +
geom_point(aes(y = mean), size = 4) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper)) +
theme_classic()