MODELS <- here("exploratory_studies/13_europarl/gender_ratings/")
all_model_ratings <- map_df(list.files(MODELS, full.names = T), fread) %>%
mutate(epochs = map(model, ~str_split(pluck(pluck(str_split(., "en_"),1), 2), "ep")),
epochs = map(epochs, ~pluck(pluck(.,1),1)),
model_type = case_when(str_detect(model, "trans_f") ~ "translated",
str_detect(model, "untrans") ~ "untranslated")) %>%
select(model_type, epochs, word, male_score) %>%
mutate(epochs = as.character(epochs))
GENDER_NORMS <- here("data/study1a/raw/GlasgowNorms.csv")
glasgow_norms <- read_csv(GENDER_NORMS) %>%
select(word, GEND_M) %>%
rename(human_gender_rating = GEND_M) %>%
rowwise() %>%
mutate(word = str_split(word, " ", simplify = T)[1],
word = tolower(word)) %>%
distinct() %>%
ungroup()
all_ratings <- all_model_ratings %>%
left_join(glasgow_norms)
ggplot(all_ratings, aes(x = human_gender_rating, y = male_score)) +
geom_point(size = .2, alpha = .2) +
geom_smooth(method = "lm") +
facet_grid(epochs ~ model_type) +
theme_bw()

cor_values <- all_ratings %>%
mutate(epochs = as.numeric(epochs)) %>%
group_by(model_type, epochs) %>%
nest() %>%
mutate(temp = map(data, ~tidy(cor.test(.$human_gender_rating, .$male_score)))) %>%
select(-data) %>%
unnest()
kable(cor_values)
translated |
10 |
0.1337143 |
8.903065 |
0.0e+00 |
4354 |
0.1044310 |
0.1627658 |
Pearson’s product-moment correlation |
two.sided |
translated |
20 |
0.1117646 |
7.421269 |
0.0e+00 |
4354 |
0.0823400 |
0.1409946 |
Pearson’s product-moment correlation |
two.sided |
translated |
5 |
0.1340506 |
8.925870 |
0.0e+00 |
4354 |
0.1047698 |
0.1630992 |
Pearson’s product-moment correlation |
two.sided |
untranslated |
10 |
0.0802058 |
4.949089 |
8.0e-07 |
3783 |
0.0484700 |
0.1117798 |
Pearson’s product-moment correlation |
two.sided |
untranslated |
20 |
0.0726241 |
4.478651 |
7.7e-06 |
3783 |
0.0408590 |
0.1042425 |
Pearson’s product-moment correlation |
two.sided |
untranslated |
5 |
0.1023304 |
6.327158 |
0.0e+00 |
3783 |
0.0707013 |
0.1337539 |
Pearson’s product-moment correlation |
two.sided |
ggplot(cor_values, aes(y = estimate, x = epochs, group = model_type)) +
facet_wrap(~model_type) +
geom_point() +
geom_line() +
theme_bw()
