Embedding scores:
EMBEDDING_SCORE_PATH <- "embedding_gender_bias_occupations_gabriel.csv"
embedding_scores <- read_csv(EMBEDDING_SCORE_PATH,
col_names = c("wiki_lang_code", "occupation", "female_targets",
"male_targets", "male_score")) %>%
filter(!(occupation %in% c("male_targets", "female_targets")))
count(embedding_scores, wiki_lang_code) %>%
kable()
| wiki_lang_code | n |
|---|---|
| de | 121 |
| en | 125 |
| fr | 123 |
embedding_scores %>%
ggplot(aes(x = male_score, fill = wiki_lang_code)) +
geom_histogram() +
facet_wrap(~wiki_lang_code) +
theme_classic() +
theme(legend.position = "none")
Human ratings:
S1 <- read_csv("gabriel_norms_clean_S1.csv") %>%
mutate(type = "S1",
gender = "NA")
S1g <- read_csv("gabriel_norms_clean_S1g.csv") %>%
mutate(type = "S1g")
S2 <- read_csv("gabriel_norms_clean_S2.csv") %>%
mutate(type = "S2",
gender = "NA") %>%
filter(!is.na(occupation))
S2g <- read_csv("gabriel_norms_clean_S2g.csv") %>%
mutate(type = "S2g")
human_ratings <- bind_rows(list(S1, S1g, S2, S2g))
count(human_ratings, wiki_lang_code, type) %>%
kable()
| wiki_lang_code | type | n |
|---|---|---|
| de | S1 | 126 |
| de | S1g | 252 |
| de | S2 | 126 |
| de | S2g | 252 |
| en | S1 | 126 |
| en | S1g | 252 |
| en | S2 | 126 |
| en | S2g | 252 |
| fr | S1 | 126 |
| fr | S1g | 252 |
| fr | S2 | 126 |
| fr | S2g | 252 |
human_ratings %>%
ggplot(aes(x = human_mean_rating, fill = type)) +
geom_density(alpha = .4) +
facet_wrap(~wiki_lang_code) +
theme_classic() +
theme(legend.position = "none")
all_scores <- embedding_scores %>%
select(-female_targets, -male_targets) %>%
full_join(human_ratings) %>%
filter(!is.na(human_mean_rating)) # fix aupairs!
all_scores %>%
ggplot(aes(y = human_mean_rating, x = male_score,
color = gender, group = gender)) +
geom_point(aes(color = wiki_lang_code)) +
xlab("embedding male score") +
ylab("human male score") +
geom_smooth(method = "lm") +
facet_grid(type~wiki_lang_code, scale = "free") +
theme_classic() +
theme(legend.position = "none")
full_df <- human_ratings %>%
filter(type %in% c("S1", "S2")) %>%
rename(human_wiki_lang_code = wiki_lang_code) %>%
left_join(embedding_scores %>%
select(-female_targets, -male_targets) %>%
rename(embedding_wiki_lang_code = wiki_lang_code)) %>%
mutate(same_lang = human_wiki_lang_code == embedding_wiki_lang_code)
full_df %>%
ggplot(aes(y = human_mean_rating,
x = male_score,
group = embedding_wiki_lang_code,
color = embedding_wiki_lang_code)) +
xlab("embedding male score") +
ylab("human male score") +
geom_smooth(method = "lm", aes(linetype = same_lang)) +
facet_grid(type~human_wiki_lang_code) +
theme_classic()