MODELS <- here("exploratory_studies/15_udbank/gender_ratings/")
all_model_ratings <- map_df(list.files(MODELS, full.names = T), fread)  %>%
  mutate(epochs = case_when(str_detect(model, "15ep") ~ 15,
                            str_detect(model, "30ep") ~ 30,
                            str_detect(model, "50ep") ~ 50,
                                str_detect(model, "5ep") ~ 5),
         size = case_when(str_detect(model, "300d") ~ "300",
                                str_detect(model, "200d") ~ "200")) %>%
  select(epochs, size, word, male_score)

GENDER_NORMS <- here("data/study1a/raw/GlasgowNorms.csv")
glasgow_norms <- read_csv(GENDER_NORMS) %>%
  select(word, GEND_M) %>%
  rename(human_gender_rating = GEND_M) %>%
  rowwise() %>%
  mutate(word =  str_split(word, " ", simplify = T)[1],
         word = tolower(word)) %>%
  distinct() %>%
  ungroup()

all_ratings <- all_model_ratings %>%
  left_join(glasgow_norms) 

ggplot(all_ratings, aes(x  = human_gender_rating, y = male_score)) +
  geom_point(size = .2, alpha = .2) +
  geom_smooth(method = "lm") +
  facet_grid(epochs ~ size) +
  theme_bw()

cor_values <- all_ratings %>%
  group_by(epochs, size) %>%
  nest() %>%
  mutate(temp = map(data, ~tidy(cor.test(.$human_gender_rating, .$male_score)))) %>%
  select(-data) %>%
  unnest() 


kable(cor_values)
epochs size estimate statistic p.value parameter conf.low conf.high method alternative
15 200 0.1103832 5.854764 0.00e+00 2779 0.0735158 0.1469494 Pearson’s product-moment correlation two.sided
30 200 0.1103498 5.852967 0.00e+00 2779 0.0734821 0.1469162 Pearson’s product-moment correlation two.sided
50 200 0.1064774 5.645176 0.00e+00 2779 0.0695837 0.1430802 Pearson’s product-moment correlation two.sided
5 200 0.0831735 4.399839 1.12e-05 2779 0.0461471 0.1199717 Pearson’s product-moment correlation two.sided
15 300 0.1122461 5.954820 0.00e+00 2779 0.0753915 0.1487944 Pearson’s product-moment correlation two.sided
5 300 0.0746779 3.947760 8.08e-05 2779 0.0376132 0.1115374 Pearson’s product-moment correlation two.sided
ggplot(cor_values, aes(y = estimate, x = epochs, color = size, group = size)) +
  geom_point() +
  geom_line() +
  theme_bw()

CAREER_WORDS <- c("career", "executive", "management", "professional", "corporation", "salary", "office", "business")
FAMILY_WORDS <- c("family", "home", "parents", "children", "cousins", "marriage", "wedding", "relatives")

# parents and cousins
all_ratings %>%
  filter(word %in% c(CAREER_WORDS, FAMILY_WORDS)) %>%
  mutate(career_word_type = case_when(word %in% CAREER_WORDS ~ "career",
                                     word %in% FAMILY_WORDS ~ "family")) %>%
  group_by(epochs, size, career_word_type) %>%
  multi_boot_standard(col = "male_score")
## # A tibble: 12 x 6
## # Groups:   epochs, size, career_word_type [12]
##    epochs size  career_word_type ci_lower   ci_upper    mean
##     <dbl> <chr> <chr>               <dbl>      <dbl>   <dbl>
##  1      5 200   career            -0.0519 -0.00265   -0.0285
##  2      5 200   family            -0.0768  0.0124    -0.0334
##  3      5 300   career            -0.0529 -0.00205   -0.0304
##  4      5 300   family            -0.0740  0.0127    -0.0324
##  5     15 200   career            -0.0394  0.00947   -0.0175
##  6     15 200   family            -0.0762  0.0114    -0.0303
##  7     15 300   career            -0.0411  0.00699   -0.0201
##  8     15 300   family            -0.0806 -0.0000623 -0.0403
##  9     30 200   career            -0.0279  0.00304   -0.0131
## 10     30 200   family            -0.0543  0.0171    -0.0213
## 11     50 200   career            -0.0272 -0.00149   -0.0146
## 12     50 200   family            -0.0561  0.00726   -0.0240