MODELS <- here("exploratory_studies/15_udbank/gender_ratings/")
all_model_ratings <- map_df(list.files(MODELS, full.names = T), fread) %>%
mutate(epochs = case_when(str_detect(model, "15ep") ~ 15,
str_detect(model, "30ep") ~ 30,
str_detect(model, "50ep") ~ 50,
str_detect(model, "5ep") ~ 5),
size = case_when(str_detect(model, "300d") ~ "300",
str_detect(model, "200d") ~ "200")) %>%
select(epochs, size, word, male_score)
GENDER_NORMS <- here("data/study1a/raw/GlasgowNorms.csv")
glasgow_norms <- read_csv(GENDER_NORMS) %>%
select(word, GEND_M) %>%
rename(human_gender_rating = GEND_M) %>%
rowwise() %>%
mutate(word = str_split(word, " ", simplify = T)[1],
word = tolower(word)) %>%
distinct() %>%
ungroup()
all_ratings <- all_model_ratings %>%
left_join(glasgow_norms)
ggplot(all_ratings, aes(x = human_gender_rating, y = male_score)) +
geom_point(size = .2, alpha = .2) +
geom_smooth(method = "lm") +
facet_grid(epochs ~ size) +
theme_bw()

cor_values <- all_ratings %>%
group_by(epochs, size) %>%
nest() %>%
mutate(temp = map(data, ~tidy(cor.test(.$human_gender_rating, .$male_score)))) %>%
select(-data) %>%
unnest()
kable(cor_values)
| 15 |
200 |
0.1103832 |
5.854764 |
0.00e+00 |
2779 |
0.0735158 |
0.1469494 |
Pearson’s product-moment correlation |
two.sided |
| 30 |
200 |
0.1103498 |
5.852967 |
0.00e+00 |
2779 |
0.0734821 |
0.1469162 |
Pearson’s product-moment correlation |
two.sided |
| 50 |
200 |
0.1064774 |
5.645176 |
0.00e+00 |
2779 |
0.0695837 |
0.1430802 |
Pearson’s product-moment correlation |
two.sided |
| 5 |
200 |
0.0831735 |
4.399839 |
1.12e-05 |
2779 |
0.0461471 |
0.1199717 |
Pearson’s product-moment correlation |
two.sided |
| 15 |
300 |
0.1122461 |
5.954820 |
0.00e+00 |
2779 |
0.0753915 |
0.1487944 |
Pearson’s product-moment correlation |
two.sided |
| 5 |
300 |
0.0746779 |
3.947760 |
8.08e-05 |
2779 |
0.0376132 |
0.1115374 |
Pearson’s product-moment correlation |
two.sided |
ggplot(cor_values, aes(y = estimate, x = epochs, color = size, group = size)) +
geom_point() +
geom_line() +
theme_bw()

CAREER_WORDS <- c("career", "executive", "management", "professional", "corporation", "salary", "office", "business")
FAMILY_WORDS <- c("family", "home", "parents", "children", "cousins", "marriage", "wedding", "relatives")
# parents and cousins
all_ratings %>%
filter(word %in% c(CAREER_WORDS, FAMILY_WORDS)) %>%
mutate(career_word_type = case_when(word %in% CAREER_WORDS ~ "career",
word %in% FAMILY_WORDS ~ "family")) %>%
group_by(epochs, size, career_word_type) %>%
multi_boot_standard(col = "male_score")
## # A tibble: 12 x 6
## # Groups: epochs, size, career_word_type [12]
## epochs size career_word_type ci_lower ci_upper mean
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 5 200 career -0.0519 -0.00265 -0.0285
## 2 5 200 family -0.0768 0.0124 -0.0334
## 3 5 300 career -0.0529 -0.00205 -0.0304
## 4 5 300 family -0.0740 0.0127 -0.0324
## 5 15 200 career -0.0394 0.00947 -0.0175
## 6 15 200 family -0.0762 0.0114 -0.0303
## 7 15 300 career -0.0411 0.00699 -0.0201
## 8 15 300 family -0.0806 -0.0000623 -0.0403
## 9 30 200 career -0.0279 0.00304 -0.0131
## 10 30 200 family -0.0543 0.0171 -0.0213
## 11 50 200 career -0.0272 -0.00149 -0.0146
## 12 50 200 family -0.0561 0.00726 -0.0240